diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 446af973..0f1bf1a3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,6 +2,11 @@ default_language_version: python: python3 repos: +- repo: https://github.com/MarcoGorelli/absolufy-imports + rev: v0.3.1 + hooks: + - id: absolufy-imports + name: absolufy-imports - repo: https://github.com/asottile/pyupgrade rev: v2.37.3 hooks: diff --git a/README.rst b/README.rst index e418e1fa..8880fb2b 100644 --- a/README.rst +++ b/README.rst @@ -63,7 +63,7 @@ For a detailed description of each ECA&D index, please visit: https://www.ecad.e .. Pytest Coverage Comment:Begin -.. |coverage| image:: https://img.shields.io/badge/Coverage-93%25-brightgreen.svg +.. |coverage| image:: https://img.shields.io/badge/Coverage-92%25-brightgreen.svg :target: https://github.com/cerfacs-globc/icclim/blob/master/README.rst#code-coverage :alt: Code coverage diff --git a/doc/source/references/custom_indices.rst b/doc/source/references/custom_indices.rst index f4845283..6e433fe2 100644 --- a/doc/source/references/custom_indices.rst +++ b/doc/source/references/custom_indices.rst @@ -33,31 +33,31 @@ In icclim documentation we usually call them custom indices or user indices. ~~~~~~~~~~~~~~~~~~~~~~~~~ ``user_index`` is a dictionary with possible keys: -+--------------------------+-------------------------------------------+--------------------------------------------------------------------------------------+ -|Key |Type of value |Description | -+==========================+===========================================+======================================================================================+ ++------------------------+-------------------------------------------+--------------------------------------------------------------------------------------+ +|Key |Type of value |Description | ++========================+===========================================+======================================================================================+ |index_name |*str* |Name of custom index. | -+--------------------------+-------------------------------------------+--------------------------------------------------------------------------------------+ ++------------------------+-------------------------------------------+--------------------------------------------------------------------------------------+ |calc_operation |*str* |Type of calculation. See below for more details. | -+--------------------------+-------------------------------------------+--------------------------------------------------------------------------------------+ -|logical_operation |*str* |gt, lt, get, let or e | -+--------------------------+-------------------------------------------+--------------------------------------------------------------------------------------+ ++------------------------+-------------------------------------------+--------------------------------------------------------------------------------------+ +|logical_operation |*str* |gt, lt, get, let or e | ++------------------------+-------------------------------------------+--------------------------------------------------------------------------------------+ |thresh |*float* or *str* |In case of percentile-based index, must be string which starts with "p" (e.g. "p90"). | -+--------------------------+-------------------------------------------+--------------------------------------------------------------------------------------+ -|link_logical_operations |*str* |and or or | -+--------------------------+-------------------------------------------+--------------------------------------------------------------------------------------+ -|extreme_mode |*str* |min or max for computing min or max of running mean/sum. | -+--------------------------+-------------------------------------------+--------------------------------------------------------------------------------------+ ++------------------------+-------------------------------------------+--------------------------------------------------------------------------------------+ +|link_logical_operations |*str* |and or or | ++------------------------+-------------------------------------------+--------------------------------------------------------------------------------------+ +|extreme_mode |*str* |min or max for computing min or max of running mean/sum. | ++------------------------+-------------------------------------------+--------------------------------------------------------------------------------------+ |window_width |*int* |Used for computing running mean/sum. | -+--------------------------+-------------------------------------------+--------------------------------------------------------------------------------------+ ++------------------------+-------------------------------------------+--------------------------------------------------------------------------------------+ |coef |*float* |Constant for multiplying input data array. | -+--------------------------+-------------------------------------------+--------------------------------------------------------------------------------------+ ++------------------------+-------------------------------------------+--------------------------------------------------------------------------------------+ |date_event |*bool* |To keep or not the date of event. See below for more details. | -+--------------------------+-------------------------------------------+--------------------------------------------------------------------------------------+ ++------------------------+-------------------------------------------+--------------------------------------------------------------------------------------+ |var_type |*str* |"t" or "p". See below for more details. | -+--------------------------+-------------------------------------------+--------------------------------------------------------------------------------------+ ++------------------------+-------------------------------------------+--------------------------------------------------------------------------------------+ |ref_time_range |[*datetime.datetime*, *datetime.datetime*] |Time range of reference (baseline) period for computing anomalies. | -+--------------------------+-------------------------------------------+--------------------------------------------------------------------------------------+ ++------------------------+-------------------------------------------+--------------------------------------------------------------------------------------+ Additional information about ``user_index`` keys are given below. @@ -66,7 +66,7 @@ calc_operation key ++++++++++++++++++ ======================================= =========================================================================== -value description +Value Description ======================================= =========================================================================== ``max`` maximum ``min`` minimum diff --git a/doc/source/references/icclim_index_api.rst b/doc/source/references/icclim_index_api.rst index 04f98690..cd9890d8 100644 --- a/doc/source/references/icclim_index_api.rst +++ b/doc/source/references/icclim_index_api.rst @@ -351,6 +351,8 @@ in *icclim* as ``hyndman_fan`` interpolation, also known as type 8. Percentile-based indices (TX10p, TX90p, TN10p, TN90p, TG10p, TG90p, R75p, R95p and R99p) could be returned as number of days (default) or as percentage of days (``out_unit`` = "%"). +.. _custom_indices_old: + Custom indices -------------- diff --git a/doc/source/references/release_notes.rst b/doc/source/references/release_notes.rst index 5b111efc..14d350df 100644 --- a/doc/source/references/release_notes.rst +++ b/doc/source/references/release_notes.rst @@ -1,6 +1,25 @@ Release history =============== +6.0 (unreleased) +---------------- +* [enh] Add generic indices +* [enh] Make in_files.var.threshold and threshold parameters work with string values (a value with a unit or a percentile stamp) +* [maint] **BREAKING CHANGE:** ECAD indices are no longer configurable! Use a generic index instead. +* [fix] **BREAKING CHANGE:** ECAD indices CW, CD, WW, WD were computing the precipitation percentiles on day of year values where it should have been percentiles of the whole period (excluding dry days). This has been fixed. +* [maint] icclim no longer carries a version of the clix-meta yml file. Previously it was used to generate the doc string and a few metadata of ECAD indices. It's no longer needed as we have put these metadata within StandardIndex declaration. +* [maint] **BREAKING CHANGE:** Removed the `clipped_season` option from `slice_mode`. With generic indices, `season` should work with every indices. +In particular, spell based indices (e.g. wsdi) are mapped to `max_consecutive_occurrence` generic indicator, which computes the spell length before doing the resampling operation. +So a spell that start and end outside the output frequency interval is properly accounted for its whole duration. +That's for example the case of `slice_mode="month"`, but with a spell that start in january and end in March, the whole spell length would be available in january results. +* [maint] **BREAKING CHANGE:** User index `max_nb_consecutive_events` is also mapped to `max_consecutive_occurrence`, consequently spells are also counted for their whole duration. +* [enh] Make it possible to pass a simple dictionary in `in_files`, merging together basic `in_files` and `var_name` features. +It looks like `in_files={"tasmax": "tasmax.nc", "tasmin": "tasmin.zarr"}` +* [enh] Add `min_spell_length` parameter to index API in order to control the minimum duration of spells in `sum_of_spell_lengths`. +* [enh] Add `rolling_window_width` parameter to index API in order to control the width of the rolling window in `max|min_of_rolling_sum|average`. +* [enh] Add `doy_window_width` parameter to index API in order to control the width of aggregation windows when computing doy percentiles. +* [maint] Deprecate `window_width` parameter. When filled, it is mapped to It is still mapped `doy_window_width`. + 5.4.0 ----- * [fix] When giving input as a list of netcdf files, the coordinate values are now merged using the `override` strategy, thus the first file with a given dimension define this dimension for all the files. diff --git a/icclim/__init__.py b/icclim/__init__.py index e8bd560c..6a4ccb8b 100644 --- a/icclim/__init__.py +++ b/icclim/__init__.py @@ -1,8 +1,7 @@ # keep imports below to expose api in `icclim` namespace +from icclim._generated_api import * # noqa +from icclim.main import index, indice, indices # noqa from icclim.models.constants import ICCLIM_VERSION - -from ._generated_api import * # noqa -from .main import index, indice, indices # noqa -from .pre_processing.rechunk import create_optimized_zarr_store # noqa +from icclim.pre_processing.rechunk import create_optimized_zarr_store # noqa __version__ = ICCLIM_VERSION diff --git a/icclim/_generated_api.py b/icclim/_generated_api.py index d8374028..1e460d61 100644 --- a/icclim/_generated_api.py +++ b/icclim/_generated_api.py @@ -3,19 +3,22 @@ To modify these, edit the extractor tool in `tools/extract-icclim-funs.py`. This module exposes each climate index as individual functions for convenience. """ +# flake8: noqa E501 from __future__ import annotations import datetime +from typing import Sequence from xarray.core.dataset import Dataset import icclim from icclim.icclim_logger import Verbosity -from icclim.models.frequency import Frequency, SliceMode +from icclim.icclim_types import InFileLike, SamplingMethodLike +from icclim.models.frequency import Frequency, FrequencyLike from icclim.models.netcdf_version import NetcdfVersion from icclim.models.quantile_interpolation import QuantileInterpolation +from icclim.models.threshold import Threshold from icclim.models.user_index_dict import UserIndexDict -from icclim.pre_processing.input_parsing import InFileType __all__ = [ "tg", @@ -72,14 +75,15 @@ def tg( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, ignore_Feb29th: bool = False, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - logs_verbosity: Verbosity | str = Verbosity.LOW, + netcdf_version: str | NetcdfVersion = "NETCDF4", + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ TG: Mean of daily mean temperature @@ -88,15 +92,16 @@ def tg( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -105,26 +110,29 @@ def tg( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -140,18 +148,21 @@ def tg( ignore_Feb29th=ignore_Feb29th, netcdf_version=netcdf_version, logs_verbosity=logs_verbosity, + date_event=date_event, + out_unit="degree_Celsius", ) def tn( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, ignore_Feb29th: bool = False, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - logs_verbosity: Verbosity | str = Verbosity.LOW, + netcdf_version: str | NetcdfVersion = "NETCDF4", + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ TN: Mean of daily minimum temperature @@ -160,15 +171,16 @@ def tn( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -177,26 +189,29 @@ def tn( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -212,18 +227,21 @@ def tn( ignore_Feb29th=ignore_Feb29th, netcdf_version=netcdf_version, logs_verbosity=logs_verbosity, + date_event=date_event, + out_unit="degree_Celsius", ) def tx( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, ignore_Feb29th: bool = False, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - logs_verbosity: Verbosity | str = Verbosity.LOW, + netcdf_version: str | NetcdfVersion = "NETCDF4", + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ TX: Mean of daily maximum temperature @@ -232,15 +250,16 @@ def tx( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -249,26 +268,29 @@ def tx( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -284,18 +306,21 @@ def tx( ignore_Feb29th=ignore_Feb29th, netcdf_version=netcdf_version, logs_verbosity=logs_verbosity, + date_event=date_event, + out_unit="degree_Celsius", ) def dtr( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, ignore_Feb29th: bool = False, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - logs_verbosity: Verbosity | str = Verbosity.LOW, + netcdf_version: str | NetcdfVersion = "NETCDF4", + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ DTR: Mean Diurnal Temperature Range @@ -304,15 +329,16 @@ def dtr( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -321,26 +347,29 @@ def dtr( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -356,18 +385,21 @@ def dtr( ignore_Feb29th=ignore_Feb29th, netcdf_version=netcdf_version, logs_verbosity=logs_verbosity, + date_event=date_event, + out_unit="degree_Celsius", ) def etr( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, ignore_Feb29th: bool = False, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - logs_verbosity: Verbosity | str = Verbosity.LOW, + netcdf_version: str | NetcdfVersion = "NETCDF4", + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ ETR: Intra-period extreme temperature range @@ -376,15 +408,16 @@ def etr( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -393,26 +426,29 @@ def etr( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -428,18 +464,21 @@ def etr( ignore_Feb29th=ignore_Feb29th, netcdf_version=netcdf_version, logs_verbosity=logs_verbosity, + date_event=date_event, + out_unit="degree_Celsius", ) def vdtr( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, ignore_Feb29th: bool = False, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - logs_verbosity: Verbosity | str = Verbosity.LOW, + netcdf_version: str | NetcdfVersion = "NETCDF4", + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ vDTR: Mean day-to-day variation in Diurnal Temperature Range @@ -448,15 +487,16 @@ def vdtr( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -465,26 +505,29 @@ def vdtr( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -500,19 +543,21 @@ def vdtr( ignore_Feb29th=ignore_Feb29th, netcdf_version=netcdf_version, logs_verbosity=logs_verbosity, + date_event=date_event, + out_unit="degree_Celsius", ) def su( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, - threshold: float | list[float] | None = None, ignore_Feb29th: bool = False, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - logs_verbosity: Verbosity | str = Verbosity.LOW, + netcdf_version: str | NetcdfVersion = "NETCDF4", + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ SU: Number of Summer Days (Tmax > 25C) @@ -521,15 +566,16 @@ def su( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -538,31 +584,29 @@ def su( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - threshold : float | list[float] | None - ``optional`` User defined threshold for certain indices. - Default depend on the index, see their individual definition. - When a list of threshold is provided, the index will be computed for each - thresholds. - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -575,23 +619,27 @@ def su( slice_mode=slice_mode, time_range=time_range, out_file=out_file, - threshold=threshold, ignore_Feb29th=ignore_Feb29th, netcdf_version=netcdf_version, logs_verbosity=logs_verbosity, + date_event=date_event, + threshold=Threshold( + query="> 25 degree_Celsius", + ), + out_unit="day", ) def tr( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, - threshold: float | list[float] | None = None, ignore_Feb29th: bool = False, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - logs_verbosity: Verbosity | str = Verbosity.LOW, + netcdf_version: str | NetcdfVersion = "NETCDF4", + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ TR: Number of Tropical Nights (Tmin > 20C) @@ -600,15 +648,16 @@ def tr( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -617,31 +666,29 @@ def tr( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - threshold : float | list[float] | None - ``optional`` User defined threshold for certain indices. - Default depend on the index, see their individual definition. - When a list of threshold is provided, the index will be computed for each - thresholds. - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -654,30 +701,31 @@ def tr( slice_mode=slice_mode, time_range=time_range, out_file=out_file, - threshold=threshold, ignore_Feb29th=ignore_Feb29th, netcdf_version=netcdf_version, logs_verbosity=logs_verbosity, + date_event=date_event, + threshold=Threshold( + query="> 20 degree_Celsius", + ), + out_unit="day", ) def wsdi( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, - threshold: float | list[float] | None = None, - base_period_time_range: list[datetime] | list[str] | tuple[str, str] | None = None, - window_width: int = 5, + base_period_time_range: Sequence[datetime] | Sequence[str] | None = None, only_leap_years: bool = False, ignore_Feb29th: bool = False, - interpolation: str - | QuantileInterpolation - | None = QuantileInterpolation.MEDIAN_UNBIASED, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - save_percentile: bool = False, - logs_verbosity: Verbosity | str = Verbosity.LOW, + interpolation: str | QuantileInterpolation = "median_unbiased", + netcdf_version: str | NetcdfVersion = "NETCDF4", + save_thresholds: bool = False, + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ WSDI: Warm-spell duration index (days) @@ -686,15 +734,16 @@ def wsdi( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -703,57 +752,53 @@ def wsdi( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - threshold : float | list[float] | None - ``optional`` User defined threshold for certain indices. - Default depend on the index, see their individual definition. - When a list of threshold is provided, the index will be computed for each - thresholds. - base_period_time_range : list[datetime ] | list[str] | tuple[str, str] | None - ``optional`` Temporal range of the reference period on which percentiles are - computed. + base_period_time_range: list[datetime ] | list[str] | tuple[str, str] | None + ``optional`` Temporal range of the reference period. + The dates can either be given as instance of datetime.datetime or as string + values. + It is used either: + #. to compute percentiles if threshold is filled. When missing, the studied period is used to compute percentiles. The study period is either the dataset filtered by `time_range` or the whole - dataset if `time_range` is None. - On temperature based indices relying on percentiles (TX90p, WSDI...), the + dataset if `time_range` is missing. + For day of year percentiles (doy_per), on extreme percentiles the overlapping period between `base_period_time_range` and the study period is bootstrapped. - On indices not relying on percentiles, this parameter is ignored. - The dates can either be given as instance of datetime.datetime or as string - values. - For strings, many format are accepted. - window_width : int - ``optional`` User defined window width for related indices (default: 5). - Ignored for non related indices. - only_leap_years : bool + #. to compute a reference period for indices such as difference_of_mean + (a.k.a anomaly) if a single variable is given in input. + only_leap_years: bool ``optional`` Option for February 29th (default: False). - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - interpolation : str | QuantileInterpolation | None + interpolation: str | QuantileInterpolation | None ``optional`` Interpolation method to compute percentile values: - ``{"linear", "hyndman_fan"}`` - Default is "hyndman_fan", a.k.a type 8 or method 8. + ``{"linear", "median_unbiased"}`` + Default is "median_unbiased", a.k.a type 8 or method 8. Ignored for non percentile based indices. - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - save_percentile : bool - ``optional`` True if the percentiles should be saved within the resulting netcdf + save_thresholds: bool + ``optional`` True if the thresholds should be saved within the resulting netcdf file (default: False). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -766,36 +811,39 @@ def wsdi( slice_mode=slice_mode, time_range=time_range, out_file=out_file, - threshold=threshold, base_period_time_range=base_period_time_range, - window_width=window_width, only_leap_years=only_leap_years, ignore_Feb29th=ignore_Feb29th, interpolation=interpolation, netcdf_version=netcdf_version, - save_percentile=save_percentile, + save_thresholds=save_thresholds, logs_verbosity=logs_verbosity, + date_event=date_event, + threshold=Threshold( + query="> 90 doy_per", + doy_window_width=5, + only_leap_years=only_leap_years, + interpolation=interpolation, + reference_period=base_period_time_range, + ), + out_unit="day", ) def tg90p( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, - threshold: float | list[float] | None = None, - base_period_time_range: list[datetime] | list[str] | tuple[str, str] | None = None, - window_width: int = 5, + base_period_time_range: Sequence[datetime] | Sequence[str] | None = None, only_leap_years: bool = False, ignore_Feb29th: bool = False, - interpolation: str - | QuantileInterpolation - | None = QuantileInterpolation.MEDIAN_UNBIASED, - out_unit: str | None = None, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - save_percentile: bool = False, - logs_verbosity: Verbosity | str = Verbosity.LOW, + interpolation: str | QuantileInterpolation = "median_unbiased", + netcdf_version: str | NetcdfVersion = "NETCDF4", + save_thresholds: bool = False, + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ TG90p: Days when Tmean > 90th percentile @@ -804,15 +852,16 @@ def tg90p( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -821,59 +870,53 @@ def tg90p( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - threshold : float | list[float] | None - ``optional`` User defined threshold for certain indices. - Default depend on the index, see their individual definition. - When a list of threshold is provided, the index will be computed for each - thresholds. - base_period_time_range : list[datetime ] | list[str] | tuple[str, str] | None - ``optional`` Temporal range of the reference period on which percentiles are - computed. + base_period_time_range: list[datetime ] | list[str] | tuple[str, str] | None + ``optional`` Temporal range of the reference period. + The dates can either be given as instance of datetime.datetime or as string + values. + It is used either: + #. to compute percentiles if threshold is filled. When missing, the studied period is used to compute percentiles. The study period is either the dataset filtered by `time_range` or the whole - dataset if `time_range` is None. - On temperature based indices relying on percentiles (TX90p, WSDI...), the + dataset if `time_range` is missing. + For day of year percentiles (doy_per), on extreme percentiles the overlapping period between `base_period_time_range` and the study period is bootstrapped. - On indices not relying on percentiles, this parameter is ignored. - The dates can either be given as instance of datetime.datetime or as string - values. - For strings, many format are accepted. - window_width : int - ``optional`` User defined window width for related indices (default: 5). - Ignored for non related indices. - only_leap_years : bool + #. to compute a reference period for indices such as difference_of_mean + (a.k.a anomaly) if a single variable is given in input. + only_leap_years: bool ``optional`` Option for February 29th (default: False). - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - interpolation : str | QuantileInterpolation | None + interpolation: str | QuantileInterpolation | None ``optional`` Interpolation method to compute percentile values: - ``{"linear", "hyndman_fan"}`` - Default is "hyndman_fan", a.k.a type 8 or method 8. + ``{"linear", "median_unbiased"}`` + Default is "median_unbiased", a.k.a type 8 or method 8. Ignored for non percentile based indices. - out_unit : str | None - ``optional`` Output unit for certain indices: "days" or "%" (default: "days"). - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - save_percentile : bool - ``optional`` True if the percentiles should be saved within the resulting netcdf + save_thresholds: bool + ``optional`` True if the thresholds should be saved within the resulting netcdf file (default: False). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -886,37 +929,39 @@ def tg90p( slice_mode=slice_mode, time_range=time_range, out_file=out_file, - threshold=threshold, base_period_time_range=base_period_time_range, - window_width=window_width, only_leap_years=only_leap_years, ignore_Feb29th=ignore_Feb29th, interpolation=interpolation, - out_unit=out_unit, netcdf_version=netcdf_version, - save_percentile=save_percentile, + save_thresholds=save_thresholds, logs_verbosity=logs_verbosity, + date_event=date_event, + threshold=Threshold( + query="> 90 doy_per", + doy_window_width=5, + only_leap_years=only_leap_years, + interpolation=interpolation, + reference_period=base_period_time_range, + ), + out_unit="day", ) def tn90p( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, - threshold: float | list[float] | None = None, - base_period_time_range: list[datetime] | list[str] | tuple[str, str] | None = None, - window_width: int = 5, + base_period_time_range: Sequence[datetime] | Sequence[str] | None = None, only_leap_years: bool = False, ignore_Feb29th: bool = False, - interpolation: str - | QuantileInterpolation - | None = QuantileInterpolation.MEDIAN_UNBIASED, - out_unit: str | None = None, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - save_percentile: bool = False, - logs_verbosity: Verbosity | str = Verbosity.LOW, + interpolation: str | QuantileInterpolation = "median_unbiased", + netcdf_version: str | NetcdfVersion = "NETCDF4", + save_thresholds: bool = False, + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ TN90p: Days when Tmin > 90th percentile @@ -925,15 +970,16 @@ def tn90p( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -942,59 +988,53 @@ def tn90p( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - threshold : float | list[float] | None - ``optional`` User defined threshold for certain indices. - Default depend on the index, see their individual definition. - When a list of threshold is provided, the index will be computed for each - thresholds. - base_period_time_range : list[datetime ] | list[str] | tuple[str, str] | None - ``optional`` Temporal range of the reference period on which percentiles are - computed. + base_period_time_range: list[datetime ] | list[str] | tuple[str, str] | None + ``optional`` Temporal range of the reference period. + The dates can either be given as instance of datetime.datetime or as string + values. + It is used either: + #. to compute percentiles if threshold is filled. When missing, the studied period is used to compute percentiles. The study period is either the dataset filtered by `time_range` or the whole - dataset if `time_range` is None. - On temperature based indices relying on percentiles (TX90p, WSDI...), the + dataset if `time_range` is missing. + For day of year percentiles (doy_per), on extreme percentiles the overlapping period between `base_period_time_range` and the study period is bootstrapped. - On indices not relying on percentiles, this parameter is ignored. - The dates can either be given as instance of datetime.datetime or as string - values. - For strings, many format are accepted. - window_width : int - ``optional`` User defined window width for related indices (default: 5). - Ignored for non related indices. - only_leap_years : bool + #. to compute a reference period for indices such as difference_of_mean + (a.k.a anomaly) if a single variable is given in input. + only_leap_years: bool ``optional`` Option for February 29th (default: False). - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - interpolation : str | QuantileInterpolation | None + interpolation: str | QuantileInterpolation | None ``optional`` Interpolation method to compute percentile values: - ``{"linear", "hyndman_fan"}`` - Default is "hyndman_fan", a.k.a type 8 or method 8. + ``{"linear", "median_unbiased"}`` + Default is "median_unbiased", a.k.a type 8 or method 8. Ignored for non percentile based indices. - out_unit : str | None - ``optional`` Output unit for certain indices: "days" or "%" (default: "days"). - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - save_percentile : bool - ``optional`` True if the percentiles should be saved within the resulting netcdf + save_thresholds: bool + ``optional`` True if the thresholds should be saved within the resulting netcdf file (default: False). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -1007,37 +1047,39 @@ def tn90p( slice_mode=slice_mode, time_range=time_range, out_file=out_file, - threshold=threshold, base_period_time_range=base_period_time_range, - window_width=window_width, only_leap_years=only_leap_years, ignore_Feb29th=ignore_Feb29th, interpolation=interpolation, - out_unit=out_unit, netcdf_version=netcdf_version, - save_percentile=save_percentile, + save_thresholds=save_thresholds, logs_verbosity=logs_verbosity, + date_event=date_event, + threshold=Threshold( + query="> 90 doy_per", + doy_window_width=5, + only_leap_years=only_leap_years, + interpolation=interpolation, + reference_period=base_period_time_range, + ), + out_unit="day", ) def tx90p( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, - threshold: float | list[float] | None = None, - base_period_time_range: list[datetime] | list[str] | tuple[str, str] | None = None, - window_width: int = 5, + base_period_time_range: Sequence[datetime] | Sequence[str] | None = None, only_leap_years: bool = False, ignore_Feb29th: bool = False, - interpolation: str - | QuantileInterpolation - | None = QuantileInterpolation.MEDIAN_UNBIASED, - out_unit: str | None = None, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - save_percentile: bool = False, - logs_verbosity: Verbosity | str = Verbosity.LOW, + interpolation: str | QuantileInterpolation = "median_unbiased", + netcdf_version: str | NetcdfVersion = "NETCDF4", + save_thresholds: bool = False, + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ TX90p: Days when Tmax > 90th daily percentile @@ -1046,15 +1088,16 @@ def tx90p( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -1063,59 +1106,53 @@ def tx90p( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - threshold : float | list[float] | None - ``optional`` User defined threshold for certain indices. - Default depend on the index, see their individual definition. - When a list of threshold is provided, the index will be computed for each - thresholds. - base_period_time_range : list[datetime ] | list[str] | tuple[str, str] | None - ``optional`` Temporal range of the reference period on which percentiles are - computed. + base_period_time_range: list[datetime ] | list[str] | tuple[str, str] | None + ``optional`` Temporal range of the reference period. + The dates can either be given as instance of datetime.datetime or as string + values. + It is used either: + #. to compute percentiles if threshold is filled. When missing, the studied period is used to compute percentiles. The study period is either the dataset filtered by `time_range` or the whole - dataset if `time_range` is None. - On temperature based indices relying on percentiles (TX90p, WSDI...), the + dataset if `time_range` is missing. + For day of year percentiles (doy_per), on extreme percentiles the overlapping period between `base_period_time_range` and the study period is bootstrapped. - On indices not relying on percentiles, this parameter is ignored. - The dates can either be given as instance of datetime.datetime or as string - values. - For strings, many format are accepted. - window_width : int - ``optional`` User defined window width for related indices (default: 5). - Ignored for non related indices. - only_leap_years : bool + #. to compute a reference period for indices such as difference_of_mean + (a.k.a anomaly) if a single variable is given in input. + only_leap_years: bool ``optional`` Option for February 29th (default: False). - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - interpolation : str | QuantileInterpolation | None + interpolation: str | QuantileInterpolation | None ``optional`` Interpolation method to compute percentile values: - ``{"linear", "hyndman_fan"}`` - Default is "hyndman_fan", a.k.a type 8 or method 8. + ``{"linear", "median_unbiased"}`` + Default is "median_unbiased", a.k.a type 8 or method 8. Ignored for non percentile based indices. - out_unit : str | None - ``optional`` Output unit for certain indices: "days" or "%" (default: "days"). - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - save_percentile : bool - ``optional`` True if the percentiles should be saved within the resulting netcdf + save_thresholds: bool + ``optional`` True if the thresholds should be saved within the resulting netcdf file (default: False). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -1128,28 +1165,35 @@ def tx90p( slice_mode=slice_mode, time_range=time_range, out_file=out_file, - threshold=threshold, base_period_time_range=base_period_time_range, - window_width=window_width, only_leap_years=only_leap_years, ignore_Feb29th=ignore_Feb29th, interpolation=interpolation, - out_unit=out_unit, netcdf_version=netcdf_version, - save_percentile=save_percentile, + save_thresholds=save_thresholds, logs_verbosity=logs_verbosity, + date_event=date_event, + threshold=Threshold( + query="> 90 doy_per", + doy_window_width=5, + only_leap_years=only_leap_years, + interpolation=interpolation, + reference_period=base_period_time_range, + ), + out_unit="day", ) def txx( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, ignore_Feb29th: bool = False, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - logs_verbosity: Verbosity | str = Verbosity.LOW, + netcdf_version: str | NetcdfVersion = "NETCDF4", + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ TXx: Maximum daily maximum temperature @@ -1158,15 +1202,16 @@ def txx( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -1175,26 +1220,29 @@ def txx( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -1210,18 +1258,21 @@ def txx( ignore_Feb29th=ignore_Feb29th, netcdf_version=netcdf_version, logs_verbosity=logs_verbosity, + date_event=date_event, + out_unit="degree_Celsius", ) def tnx( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, ignore_Feb29th: bool = False, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - logs_verbosity: Verbosity | str = Verbosity.LOW, + netcdf_version: str | NetcdfVersion = "NETCDF4", + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ TNx: Maximum daily minimum temperature @@ -1230,15 +1281,16 @@ def tnx( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -1247,26 +1299,29 @@ def tnx( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -1282,19 +1337,21 @@ def tnx( ignore_Feb29th=ignore_Feb29th, netcdf_version=netcdf_version, logs_verbosity=logs_verbosity, + date_event=date_event, + out_unit="degree_Celsius", ) def csu( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, - threshold: float | list[float] | None = None, ignore_Feb29th: bool = False, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - logs_verbosity: Verbosity | str = Verbosity.LOW, + netcdf_version: str | NetcdfVersion = "NETCDF4", + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ CSU: Maximum number of consecutive summer days (Tmax >25 C) @@ -1303,15 +1360,16 @@ def csu( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -1320,31 +1378,29 @@ def csu( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - threshold : float | list[float] | None - ``optional`` User defined threshold for certain indices. - Default depend on the index, see their individual definition. - When a list of threshold is provided, the index will be computed for each - thresholds. - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -1357,23 +1413,27 @@ def csu( slice_mode=slice_mode, time_range=time_range, out_file=out_file, - threshold=threshold, ignore_Feb29th=ignore_Feb29th, netcdf_version=netcdf_version, logs_verbosity=logs_verbosity, + date_event=date_event, + threshold=Threshold( + query="> 25 degree_Celsius", + ), + out_unit="day", ) def gd4( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, - threshold: float | list[float] | None = None, ignore_Feb29th: bool = False, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - logs_verbosity: Verbosity | str = Verbosity.LOW, + netcdf_version: str | NetcdfVersion = "NETCDF4", + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ GD4: Growing degree days (sum of Tmean > 4 C) @@ -1382,15 +1442,16 @@ def gd4( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -1399,31 +1460,29 @@ def gd4( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - threshold : float | list[float] | None - ``optional`` User defined threshold for certain indices. - Default depend on the index, see their individual definition. - When a list of threshold is provided, the index will be computed for each - thresholds. - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -1436,23 +1495,27 @@ def gd4( slice_mode=slice_mode, time_range=time_range, out_file=out_file, - threshold=threshold, ignore_Feb29th=ignore_Feb29th, netcdf_version=netcdf_version, logs_verbosity=logs_verbosity, + date_event=date_event, + threshold=Threshold( + query="4 degree_Celsius", + ), + out_unit="degree_Celsius day", ) def fd( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, - threshold: float | list[float] | None = None, ignore_Feb29th: bool = False, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - logs_verbosity: Verbosity | str = Verbosity.LOW, + netcdf_version: str | NetcdfVersion = "NETCDF4", + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ FD: Number of Frost Days (Tmin < 0C) @@ -1461,15 +1524,16 @@ def fd( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -1478,31 +1542,29 @@ def fd( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - threshold : float | list[float] | None - ``optional`` User defined threshold for certain indices. - Default depend on the index, see their individual definition. - When a list of threshold is provided, the index will be computed for each - thresholds. - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -1515,23 +1577,27 @@ def fd( slice_mode=slice_mode, time_range=time_range, out_file=out_file, - threshold=threshold, ignore_Feb29th=ignore_Feb29th, netcdf_version=netcdf_version, logs_verbosity=logs_verbosity, + date_event=date_event, + threshold=Threshold( + query="< 0 degree_Celsius", + ), + out_unit="day", ) def cfd( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, - threshold: float | list[float] | None = None, ignore_Feb29th: bool = False, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - logs_verbosity: Verbosity | str = Verbosity.LOW, + netcdf_version: str | NetcdfVersion = "NETCDF4", + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ CFD: Maximum number of consecutive frost days (Tmin < 0 C) @@ -1540,15 +1606,16 @@ def cfd( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -1557,31 +1624,29 @@ def cfd( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - threshold : float | list[float] | None - ``optional`` User defined threshold for certain indices. - Default depend on the index, see their individual definition. - When a list of threshold is provided, the index will be computed for each - thresholds. - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -1594,23 +1659,27 @@ def cfd( slice_mode=slice_mode, time_range=time_range, out_file=out_file, - threshold=threshold, ignore_Feb29th=ignore_Feb29th, netcdf_version=netcdf_version, logs_verbosity=logs_verbosity, + date_event=date_event, + threshold=Threshold( + query="< 0 degree_Celsius", + ), + out_unit="day", ) def hd17( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, - threshold: float | list[float] | None = None, ignore_Feb29th: bool = False, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - logs_verbosity: Verbosity | str = Verbosity.LOW, + netcdf_version: str | NetcdfVersion = "NETCDF4", + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ HD17: Heating degree days (sum of Tmean < 17 C) @@ -1619,15 +1688,16 @@ def hd17( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -1636,31 +1706,29 @@ def hd17( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - threshold : float | list[float] | None - ``optional`` User defined threshold for certain indices. - Default depend on the index, see their individual definition. - When a list of threshold is provided, the index will be computed for each - thresholds. - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -1673,23 +1741,27 @@ def hd17( slice_mode=slice_mode, time_range=time_range, out_file=out_file, - threshold=threshold, ignore_Feb29th=ignore_Feb29th, netcdf_version=netcdf_version, logs_verbosity=logs_verbosity, + date_event=date_event, + threshold=Threshold( + query="17 degree_Celsius", + ), + out_unit="degree_Celsius day", ) def id( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, - threshold: float | list[float] | None = None, ignore_Feb29th: bool = False, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - logs_verbosity: Verbosity | str = Verbosity.LOW, + netcdf_version: str | NetcdfVersion = "NETCDF4", + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ ID: Number of sharp Ice Days (Tmax < 0C) @@ -1698,15 +1770,16 @@ def id( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -1715,31 +1788,29 @@ def id( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - threshold : float | list[float] | None - ``optional`` User defined threshold for certain indices. - Default depend on the index, see their individual definition. - When a list of threshold is provided, the index will be computed for each - thresholds. - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -1752,31 +1823,31 @@ def id( slice_mode=slice_mode, time_range=time_range, out_file=out_file, - threshold=threshold, ignore_Feb29th=ignore_Feb29th, netcdf_version=netcdf_version, logs_verbosity=logs_verbosity, + date_event=date_event, + threshold=Threshold( + query="< 0 degree_Celsius", + ), + out_unit="day", ) def tg10p( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, - threshold: float | list[float] | None = None, - base_period_time_range: list[datetime] | list[str] | tuple[str, str] | None = None, - window_width: int = 5, + base_period_time_range: Sequence[datetime] | Sequence[str] | None = None, only_leap_years: bool = False, ignore_Feb29th: bool = False, - interpolation: str - | QuantileInterpolation - | None = QuantileInterpolation.MEDIAN_UNBIASED, - out_unit: str | None = None, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - save_percentile: bool = False, - logs_verbosity: Verbosity | str = Verbosity.LOW, + interpolation: str | QuantileInterpolation = "median_unbiased", + netcdf_version: str | NetcdfVersion = "NETCDF4", + save_thresholds: bool = False, + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ TG10p: Days when Tmean < 10th percentile @@ -1785,15 +1856,16 @@ def tg10p( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -1802,59 +1874,53 @@ def tg10p( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - threshold : float | list[float] | None - ``optional`` User defined threshold for certain indices. - Default depend on the index, see their individual definition. - When a list of threshold is provided, the index will be computed for each - thresholds. - base_period_time_range : list[datetime ] | list[str] | tuple[str, str] | None - ``optional`` Temporal range of the reference period on which percentiles are - computed. + base_period_time_range: list[datetime ] | list[str] | tuple[str, str] | None + ``optional`` Temporal range of the reference period. + The dates can either be given as instance of datetime.datetime or as string + values. + It is used either: + #. to compute percentiles if threshold is filled. When missing, the studied period is used to compute percentiles. The study period is either the dataset filtered by `time_range` or the whole - dataset if `time_range` is None. - On temperature based indices relying on percentiles (TX90p, WSDI...), the + dataset if `time_range` is missing. + For day of year percentiles (doy_per), on extreme percentiles the overlapping period between `base_period_time_range` and the study period is bootstrapped. - On indices not relying on percentiles, this parameter is ignored. - The dates can either be given as instance of datetime.datetime or as string - values. - For strings, many format are accepted. - window_width : int - ``optional`` User defined window width for related indices (default: 5). - Ignored for non related indices. - only_leap_years : bool + #. to compute a reference period for indices such as difference_of_mean + (a.k.a anomaly) if a single variable is given in input. + only_leap_years: bool ``optional`` Option for February 29th (default: False). - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - interpolation : str | QuantileInterpolation | None + interpolation: str | QuantileInterpolation | None ``optional`` Interpolation method to compute percentile values: - ``{"linear", "hyndman_fan"}`` - Default is "hyndman_fan", a.k.a type 8 or method 8. + ``{"linear", "median_unbiased"}`` + Default is "median_unbiased", a.k.a type 8 or method 8. Ignored for non percentile based indices. - out_unit : str | None - ``optional`` Output unit for certain indices: "days" or "%" (default: "days"). - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - save_percentile : bool - ``optional`` True if the percentiles should be saved within the resulting netcdf + save_thresholds: bool + ``optional`` True if the thresholds should be saved within the resulting netcdf file (default: False). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -1867,37 +1933,39 @@ def tg10p( slice_mode=slice_mode, time_range=time_range, out_file=out_file, - threshold=threshold, base_period_time_range=base_period_time_range, - window_width=window_width, only_leap_years=only_leap_years, ignore_Feb29th=ignore_Feb29th, interpolation=interpolation, - out_unit=out_unit, netcdf_version=netcdf_version, - save_percentile=save_percentile, + save_thresholds=save_thresholds, logs_verbosity=logs_verbosity, + date_event=date_event, + threshold=Threshold( + query="< 10 doy_per", + doy_window_width=5, + only_leap_years=only_leap_years, + interpolation=interpolation, + reference_period=base_period_time_range, + ), + out_unit="day", ) def tn10p( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, - threshold: float | list[float] | None = None, - base_period_time_range: list[datetime] | list[str] | tuple[str, str] | None = None, - window_width: int = 5, + base_period_time_range: Sequence[datetime] | Sequence[str] | None = None, only_leap_years: bool = False, ignore_Feb29th: bool = False, - interpolation: str - | QuantileInterpolation - | None = QuantileInterpolation.MEDIAN_UNBIASED, - out_unit: str | None = None, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - save_percentile: bool = False, - logs_verbosity: Verbosity | str = Verbosity.LOW, + interpolation: str | QuantileInterpolation = "median_unbiased", + netcdf_version: str | NetcdfVersion = "NETCDF4", + save_thresholds: bool = False, + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ TN10p: Days when Tmin < 10th percentile @@ -1906,15 +1974,16 @@ def tn10p( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -1923,59 +1992,53 @@ def tn10p( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - threshold : float | list[float] | None - ``optional`` User defined threshold for certain indices. - Default depend on the index, see their individual definition. - When a list of threshold is provided, the index will be computed for each - thresholds. - base_period_time_range : list[datetime ] | list[str] | tuple[str, str] | None - ``optional`` Temporal range of the reference period on which percentiles are - computed. + base_period_time_range: list[datetime ] | list[str] | tuple[str, str] | None + ``optional`` Temporal range of the reference period. + The dates can either be given as instance of datetime.datetime or as string + values. + It is used either: + #. to compute percentiles if threshold is filled. When missing, the studied period is used to compute percentiles. The study period is either the dataset filtered by `time_range` or the whole - dataset if `time_range` is None. - On temperature based indices relying on percentiles (TX90p, WSDI...), the + dataset if `time_range` is missing. + For day of year percentiles (doy_per), on extreme percentiles the overlapping period between `base_period_time_range` and the study period is bootstrapped. - On indices not relying on percentiles, this parameter is ignored. - The dates can either be given as instance of datetime.datetime or as string - values. - For strings, many format are accepted. - window_width : int - ``optional`` User defined window width for related indices (default: 5). - Ignored for non related indices. - only_leap_years : bool + #. to compute a reference period for indices such as difference_of_mean + (a.k.a anomaly) if a single variable is given in input. + only_leap_years: bool ``optional`` Option for February 29th (default: False). - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - interpolation : str | QuantileInterpolation | None + interpolation: str | QuantileInterpolation | None ``optional`` Interpolation method to compute percentile values: - ``{"linear", "hyndman_fan"}`` - Default is "hyndman_fan", a.k.a type 8 or method 8. + ``{"linear", "median_unbiased"}`` + Default is "median_unbiased", a.k.a type 8 or method 8. Ignored for non percentile based indices. - out_unit : str | None - ``optional`` Output unit for certain indices: "days" or "%" (default: "days"). - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - save_percentile : bool - ``optional`` True if the percentiles should be saved within the resulting netcdf + save_thresholds: bool + ``optional`` True if the thresholds should be saved within the resulting netcdf file (default: False). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -1988,37 +2051,39 @@ def tn10p( slice_mode=slice_mode, time_range=time_range, out_file=out_file, - threshold=threshold, base_period_time_range=base_period_time_range, - window_width=window_width, only_leap_years=only_leap_years, ignore_Feb29th=ignore_Feb29th, interpolation=interpolation, - out_unit=out_unit, netcdf_version=netcdf_version, - save_percentile=save_percentile, + save_thresholds=save_thresholds, logs_verbosity=logs_verbosity, + date_event=date_event, + threshold=Threshold( + query="< 10 doy_per", + doy_window_width=5, + only_leap_years=only_leap_years, + interpolation=interpolation, + reference_period=base_period_time_range, + ), + out_unit="day", ) def tx10p( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, - threshold: float | list[float] | None = None, - base_period_time_range: list[datetime] | list[str] | tuple[str, str] | None = None, - window_width: int = 5, + base_period_time_range: Sequence[datetime] | Sequence[str] | None = None, only_leap_years: bool = False, ignore_Feb29th: bool = False, - interpolation: str - | QuantileInterpolation - | None = QuantileInterpolation.MEDIAN_UNBIASED, - out_unit: str | None = None, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - save_percentile: bool = False, - logs_verbosity: Verbosity | str = Verbosity.LOW, + interpolation: str | QuantileInterpolation = "median_unbiased", + netcdf_version: str | NetcdfVersion = "NETCDF4", + save_thresholds: bool = False, + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ TX10p: Days when Tmax < 10th percentile @@ -2027,15 +2092,16 @@ def tx10p( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -2044,59 +2110,53 @@ def tx10p( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - threshold : float | list[float] | None - ``optional`` User defined threshold for certain indices. - Default depend on the index, see their individual definition. - When a list of threshold is provided, the index will be computed for each - thresholds. - base_period_time_range : list[datetime ] | list[str] | tuple[str, str] | None - ``optional`` Temporal range of the reference period on which percentiles are - computed. + base_period_time_range: list[datetime ] | list[str] | tuple[str, str] | None + ``optional`` Temporal range of the reference period. + The dates can either be given as instance of datetime.datetime or as string + values. + It is used either: + #. to compute percentiles if threshold is filled. When missing, the studied period is used to compute percentiles. The study period is either the dataset filtered by `time_range` or the whole - dataset if `time_range` is None. - On temperature based indices relying on percentiles (TX90p, WSDI...), the + dataset if `time_range` is missing. + For day of year percentiles (doy_per), on extreme percentiles the overlapping period between `base_period_time_range` and the study period is bootstrapped. - On indices not relying on percentiles, this parameter is ignored. - The dates can either be given as instance of datetime.datetime or as string - values. - For strings, many format are accepted. - window_width : int - ``optional`` User defined window width for related indices (default: 5). - Ignored for non related indices. - only_leap_years : bool + #. to compute a reference period for indices such as difference_of_mean + (a.k.a anomaly) if a single variable is given in input. + only_leap_years: bool ``optional`` Option for February 29th (default: False). - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - interpolation : str | QuantileInterpolation | None + interpolation: str | QuantileInterpolation | None ``optional`` Interpolation method to compute percentile values: - ``{"linear", "hyndman_fan"}`` - Default is "hyndman_fan", a.k.a type 8 or method 8. + ``{"linear", "median_unbiased"}`` + Default is "median_unbiased", a.k.a type 8 or method 8. Ignored for non percentile based indices. - out_unit : str | None - ``optional`` Output unit for certain indices: "days" or "%" (default: "days"). - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - save_percentile : bool - ``optional`` True if the percentiles should be saved within the resulting netcdf + save_thresholds: bool + ``optional`` True if the thresholds should be saved within the resulting netcdf file (default: False). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -2109,28 +2169,35 @@ def tx10p( slice_mode=slice_mode, time_range=time_range, out_file=out_file, - threshold=threshold, base_period_time_range=base_period_time_range, - window_width=window_width, only_leap_years=only_leap_years, ignore_Feb29th=ignore_Feb29th, interpolation=interpolation, - out_unit=out_unit, netcdf_version=netcdf_version, - save_percentile=save_percentile, + save_thresholds=save_thresholds, logs_verbosity=logs_verbosity, + date_event=date_event, + threshold=Threshold( + query="< 10 doy_per", + doy_window_width=5, + only_leap_years=only_leap_years, + interpolation=interpolation, + reference_period=base_period_time_range, + ), + out_unit="day", ) def txn( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, ignore_Feb29th: bool = False, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - logs_verbosity: Verbosity | str = Verbosity.LOW, + netcdf_version: str | NetcdfVersion = "NETCDF4", + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ TXn: Minimum daily maximum temperature @@ -2139,15 +2206,16 @@ def txn( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -2156,26 +2224,29 @@ def txn( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -2191,18 +2262,21 @@ def txn( ignore_Feb29th=ignore_Feb29th, netcdf_version=netcdf_version, logs_verbosity=logs_verbosity, + date_event=date_event, + out_unit="degree_Celsius", ) def tnn( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, ignore_Feb29th: bool = False, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - logs_verbosity: Verbosity | str = Verbosity.LOW, + netcdf_version: str | NetcdfVersion = "NETCDF4", + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ TNn: Minimum daily minimum temperature @@ -2211,15 +2285,16 @@ def tnn( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -2228,26 +2303,29 @@ def tnn( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -2263,26 +2341,25 @@ def tnn( ignore_Feb29th=ignore_Feb29th, netcdf_version=netcdf_version, logs_verbosity=logs_verbosity, + date_event=date_event, + out_unit="degree_Celsius", ) def csdi( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, - threshold: float | list[float] | None = None, - base_period_time_range: list[datetime] | list[str] | tuple[str, str] | None = None, - window_width: int = 5, + base_period_time_range: Sequence[datetime] | Sequence[str] | None = None, only_leap_years: bool = False, ignore_Feb29th: bool = False, - interpolation: str - | QuantileInterpolation - | None = QuantileInterpolation.MEDIAN_UNBIASED, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - save_percentile: bool = False, - logs_verbosity: Verbosity | str = Verbosity.LOW, + interpolation: str | QuantileInterpolation = "median_unbiased", + netcdf_version: str | NetcdfVersion = "NETCDF4", + save_thresholds: bool = False, + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ CSDI: Cold-spell duration index (days) @@ -2291,15 +2368,16 @@ def csdi( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -2308,57 +2386,53 @@ def csdi( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - threshold : float | list[float] | None - ``optional`` User defined threshold for certain indices. - Default depend on the index, see their individual definition. - When a list of threshold is provided, the index will be computed for each - thresholds. - base_period_time_range : list[datetime ] | list[str] | tuple[str, str] | None - ``optional`` Temporal range of the reference period on which percentiles are - computed. + base_period_time_range: list[datetime ] | list[str] | tuple[str, str] | None + ``optional`` Temporal range of the reference period. + The dates can either be given as instance of datetime.datetime or as string + values. + It is used either: + #. to compute percentiles if threshold is filled. When missing, the studied period is used to compute percentiles. The study period is either the dataset filtered by `time_range` or the whole - dataset if `time_range` is None. - On temperature based indices relying on percentiles (TX90p, WSDI...), the + dataset if `time_range` is missing. + For day of year percentiles (doy_per), on extreme percentiles the overlapping period between `base_period_time_range` and the study period is bootstrapped. - On indices not relying on percentiles, this parameter is ignored. - The dates can either be given as instance of datetime.datetime or as string - values. - For strings, many format are accepted. - window_width : int - ``optional`` User defined window width for related indices (default: 5). - Ignored for non related indices. - only_leap_years : bool + #. to compute a reference period for indices such as difference_of_mean + (a.k.a anomaly) if a single variable is given in input. + only_leap_years: bool ``optional`` Option for February 29th (default: False). - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - interpolation : str | QuantileInterpolation | None + interpolation: str | QuantileInterpolation | None ``optional`` Interpolation method to compute percentile values: - ``{"linear", "hyndman_fan"}`` - Default is "hyndman_fan", a.k.a type 8 or method 8. + ``{"linear", "median_unbiased"}`` + Default is "median_unbiased", a.k.a type 8 or method 8. Ignored for non percentile based indices. - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - save_percentile : bool - ``optional`` True if the percentiles should be saved within the resulting netcdf + save_thresholds: bool + ``optional`` True if the thresholds should be saved within the resulting netcdf file (default: False). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -2371,27 +2445,35 @@ def csdi( slice_mode=slice_mode, time_range=time_range, out_file=out_file, - threshold=threshold, base_period_time_range=base_period_time_range, - window_width=window_width, only_leap_years=only_leap_years, ignore_Feb29th=ignore_Feb29th, interpolation=interpolation, netcdf_version=netcdf_version, - save_percentile=save_percentile, + save_thresholds=save_thresholds, logs_verbosity=logs_verbosity, + date_event=date_event, + threshold=Threshold( + query="< 10 doy_per", + doy_window_width=5, + only_leap_years=only_leap_years, + interpolation=interpolation, + reference_period=base_period_time_range, + ), + out_unit="day", ) def cdd( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, ignore_Feb29th: bool = False, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - logs_verbosity: Verbosity | str = Verbosity.LOW, + netcdf_version: str | NetcdfVersion = "NETCDF4", + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ CDD: Maximum consecutive dry days (Precip < 1mm) @@ -2400,15 +2482,16 @@ def cdd( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -2417,26 +2500,29 @@ def cdd( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -2452,18 +2538,24 @@ def cdd( ignore_Feb29th=ignore_Feb29th, netcdf_version=netcdf_version, logs_verbosity=logs_verbosity, + date_event=date_event, + threshold=Threshold( + query="< 1 mm day-1", + ), + out_unit="day", ) def prcptot( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, ignore_Feb29th: bool = False, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - logs_verbosity: Verbosity | str = Verbosity.LOW, + netcdf_version: str | NetcdfVersion = "NETCDF4", + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ PRCPTOT: Total precipitation during Wet Days @@ -2472,15 +2564,16 @@ def prcptot( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -2489,26 +2582,29 @@ def prcptot( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -2524,18 +2620,24 @@ def prcptot( ignore_Feb29th=ignore_Feb29th, netcdf_version=netcdf_version, logs_verbosity=logs_verbosity, + date_event=date_event, + threshold=Threshold( + query=">= 1 mm day-1", + ), + out_unit="mm", ) def rr1( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, ignore_Feb29th: bool = False, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - logs_verbosity: Verbosity | str = Verbosity.LOW, + netcdf_version: str | NetcdfVersion = "NETCDF4", + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ RR1: Number of Wet Days (precip >= 1 mm) @@ -2544,15 +2646,16 @@ def rr1( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -2561,26 +2664,29 @@ def rr1( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -2596,18 +2702,24 @@ def rr1( ignore_Feb29th=ignore_Feb29th, netcdf_version=netcdf_version, logs_verbosity=logs_verbosity, + date_event=date_event, + threshold=Threshold( + query=">= 1 mm day-1", + ), + out_unit="day", ) def sdii( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, ignore_Feb29th: bool = False, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - logs_verbosity: Verbosity | str = Verbosity.LOW, + netcdf_version: str | NetcdfVersion = "NETCDF4", + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ SDII: Average precipitation during Wet Days (SDII) @@ -2616,15 +2728,16 @@ def sdii( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -2633,26 +2746,29 @@ def sdii( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -2668,18 +2784,24 @@ def sdii( ignore_Feb29th=ignore_Feb29th, netcdf_version=netcdf_version, logs_verbosity=logs_verbosity, + date_event=date_event, + threshold=Threshold( + query=">= 1 mm day-1", + ), + out_unit="mm day-1", ) def cwd( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, ignore_Feb29th: bool = False, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - logs_verbosity: Verbosity | str = Verbosity.LOW, + netcdf_version: str | NetcdfVersion = "NETCDF4", + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ CWD: Maximum consecutive wet days (Precip >= 1mm) @@ -2688,15 +2810,16 @@ def cwd( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -2705,26 +2828,29 @@ def cwd( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -2740,18 +2866,24 @@ def cwd( ignore_Feb29th=ignore_Feb29th, netcdf_version=netcdf_version, logs_verbosity=logs_verbosity, + date_event=date_event, + threshold=Threshold( + query=">= 1 mm day-1", + ), + out_unit="day", ) def r10mm( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, ignore_Feb29th: bool = False, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - logs_verbosity: Verbosity | str = Verbosity.LOW, + netcdf_version: str | NetcdfVersion = "NETCDF4", + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ R10mm: Number of heavy precipitation days (Precip >=10mm) @@ -2760,15 +2892,16 @@ def r10mm( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -2777,26 +2910,29 @@ def r10mm( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -2812,18 +2948,24 @@ def r10mm( ignore_Feb29th=ignore_Feb29th, netcdf_version=netcdf_version, logs_verbosity=logs_verbosity, + date_event=date_event, + threshold=Threshold( + query=">= 10 mm day-1", + ), + out_unit="day", ) def r20mm( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, ignore_Feb29th: bool = False, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - logs_verbosity: Verbosity | str = Verbosity.LOW, + netcdf_version: str | NetcdfVersion = "NETCDF4", + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ R20mm: Number of very heavy precipitation days (Precip >= 20mm) @@ -2832,15 +2974,16 @@ def r20mm( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -2849,26 +2992,29 @@ def r20mm( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -2884,35 +3030,42 @@ def r20mm( ignore_Feb29th=ignore_Feb29th, netcdf_version=netcdf_version, logs_verbosity=logs_verbosity, + date_event=date_event, + threshold=Threshold( + query=">= 20 mm day-1", + ), + out_unit="day", ) def rx1day( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, ignore_Feb29th: bool = False, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - logs_verbosity: Verbosity | str = Verbosity.LOW, + netcdf_version: str | NetcdfVersion = "NETCDF4", + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ - RX1day: Maximum 1-day precipitation + RX1day: maximum 1-day total precipitation Source: ECA&D, Algorithm Theoretical Basis Document (ATBD) v11. Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -2921,26 +3074,29 @@ def rx1day( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -2956,35 +3112,39 @@ def rx1day( ignore_Feb29th=ignore_Feb29th, netcdf_version=netcdf_version, logs_verbosity=logs_verbosity, + date_event=date_event, + out_unit="mm day-1", ) def rx5day( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, ignore_Feb29th: bool = False, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - logs_verbosity: Verbosity | str = Verbosity.LOW, + netcdf_version: str | NetcdfVersion = "NETCDF4", + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ - RX5day: Maximum 5-day precipitation + RX5day: maximum 5-day total precipitation Source: ECA&D, Algorithm Theoretical Basis Document (ATBD) v11. Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -2993,26 +3153,29 @@ def rx5day( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -3028,42 +3191,43 @@ def rx5day( ignore_Feb29th=ignore_Feb29th, netcdf_version=netcdf_version, logs_verbosity=logs_verbosity, + date_event=date_event, + out_unit="mm", ) def r75p( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, - base_period_time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + base_period_time_range: Sequence[datetime] | Sequence[str] | None = None, only_leap_years: bool = False, ignore_Feb29th: bool = False, - interpolation: str - | QuantileInterpolation - | None = QuantileInterpolation.MEDIAN_UNBIASED, - out_unit: str | None = None, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - save_percentile: bool = False, - logs_verbosity: Verbosity | str = Verbosity.LOW, + interpolation: str | QuantileInterpolation = "median_unbiased", + netcdf_version: str | NetcdfVersion = "NETCDF4", + save_thresholds: bool = False, + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ - R75p: Days with RR > 75th percentile of daily amounts (moderate wet days) (days) + R75p: Days with RR > 75th percentile of daily amounts (moderate wet days) (d) Source: ECA&D, Algorithm Theoretical Basis Document (ATBD) v11. Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -3072,51 +3236,53 @@ def r75p( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - base_period_time_range : list[datetime ] | list[str] | tuple[str, str] | None - ``optional`` Temporal range of the reference period on which percentiles are - computed. + base_period_time_range: list[datetime ] | list[str] | tuple[str, str] | None + ``optional`` Temporal range of the reference period. + The dates can either be given as instance of datetime.datetime or as string + values. + It is used either: + #. to compute percentiles if threshold is filled. When missing, the studied period is used to compute percentiles. The study period is either the dataset filtered by `time_range` or the whole - dataset if `time_range` is None. - On temperature based indices relying on percentiles (TX90p, WSDI...), the + dataset if `time_range` is missing. + For day of year percentiles (doy_per), on extreme percentiles the overlapping period between `base_period_time_range` and the study period is bootstrapped. - On indices not relying on percentiles, this parameter is ignored. - The dates can either be given as instance of datetime.datetime or as string - values. - For strings, many format are accepted. - only_leap_years : bool + #. to compute a reference period for indices such as difference_of_mean + (a.k.a anomaly) if a single variable is given in input. + only_leap_years: bool ``optional`` Option for February 29th (default: False). - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - interpolation : str | QuantileInterpolation | None + interpolation: str | QuantileInterpolation | None ``optional`` Interpolation method to compute percentile values: - ``{"linear", "hyndman_fan"}`` - Default is "hyndman_fan", a.k.a type 8 or method 8. + ``{"linear", "median_unbiased"}`` + Default is "median_unbiased", a.k.a type 8 or method 8. Ignored for non percentile based indices. - out_unit : str | None - ``optional`` Output unit for certain indices: "days" or "%" (default: "days"). - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - save_percentile : bool - ``optional`` True if the percentiles should be saved within the resulting netcdf + save_thresholds: bool + ``optional`` True if the thresholds should be saved within the resulting netcdf file (default: False). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -3133,28 +3299,36 @@ def r75p( only_leap_years=only_leap_years, ignore_Feb29th=ignore_Feb29th, interpolation=interpolation, - out_unit=out_unit, netcdf_version=netcdf_version, - save_percentile=save_percentile, + save_thresholds=save_thresholds, logs_verbosity=logs_verbosity, + date_event=date_event, + threshold=Threshold( + query="> 75 period_per", + doy_window_width=5, + only_leap_years=only_leap_years, + interpolation=interpolation, + reference_period=base_period_time_range, + threshold_min_value="1 mm/day", + ), + out_unit="day", ) def r75ptot( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, - base_period_time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + base_period_time_range: Sequence[datetime] | Sequence[str] | None = None, only_leap_years: bool = False, ignore_Feb29th: bool = False, - interpolation: str - | QuantileInterpolation - | None = QuantileInterpolation.MEDIAN_UNBIASED, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - save_percentile: bool = False, - logs_verbosity: Verbosity | str = Verbosity.LOW, + interpolation: str | QuantileInterpolation = "median_unbiased", + netcdf_version: str | NetcdfVersion = "NETCDF4", + save_thresholds: bool = False, + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ R75pTOT: Precipitation fraction due to moderate wet days (> 75th percentile) @@ -3163,15 +3337,16 @@ def r75ptot( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -3180,49 +3355,53 @@ def r75ptot( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - base_period_time_range : list[datetime ] | list[str] | tuple[str, str] | None - ``optional`` Temporal range of the reference period on which percentiles are - computed. + base_period_time_range: list[datetime ] | list[str] | tuple[str, str] | None + ``optional`` Temporal range of the reference period. + The dates can either be given as instance of datetime.datetime or as string + values. + It is used either: + #. to compute percentiles if threshold is filled. When missing, the studied period is used to compute percentiles. The study period is either the dataset filtered by `time_range` or the whole - dataset if `time_range` is None. - On temperature based indices relying on percentiles (TX90p, WSDI...), the + dataset if `time_range` is missing. + For day of year percentiles (doy_per), on extreme percentiles the overlapping period between `base_period_time_range` and the study period is bootstrapped. - On indices not relying on percentiles, this parameter is ignored. - The dates can either be given as instance of datetime.datetime or as string - values. - For strings, many format are accepted. - only_leap_years : bool + #. to compute a reference period for indices such as difference_of_mean + (a.k.a anomaly) if a single variable is given in input. + only_leap_years: bool ``optional`` Option for February 29th (default: False). - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - interpolation : str | QuantileInterpolation | None + interpolation: str | QuantileInterpolation | None ``optional`` Interpolation method to compute percentile values: - ``{"linear", "hyndman_fan"}`` - Default is "hyndman_fan", a.k.a type 8 or method 8. + ``{"linear", "median_unbiased"}`` + Default is "median_unbiased", a.k.a type 8 or method 8. Ignored for non percentile based indices. - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - save_percentile : bool - ``optional`` True if the percentiles should be saved within the resulting netcdf + save_thresholds: bool + ``optional`` True if the thresholds should be saved within the resulting netcdf file (default: False). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -3240,27 +3419,35 @@ def r75ptot( ignore_Feb29th=ignore_Feb29th, interpolation=interpolation, netcdf_version=netcdf_version, - save_percentile=save_percentile, + save_thresholds=save_thresholds, logs_verbosity=logs_verbosity, + date_event=date_event, + threshold=Threshold( + query="> 75 period_per", + doy_window_width=5, + only_leap_years=only_leap_years, + interpolation=interpolation, + reference_period=base_period_time_range, + threshold_min_value="1 mm/day", + ), + out_unit="%", ) def r95p( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, - base_period_time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + base_period_time_range: Sequence[datetime] | Sequence[str] | None = None, only_leap_years: bool = False, ignore_Feb29th: bool = False, - interpolation: str - | QuantileInterpolation - | None = QuantileInterpolation.MEDIAN_UNBIASED, - out_unit: str | None = None, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - save_percentile: bool = False, - logs_verbosity: Verbosity | str = Verbosity.LOW, + interpolation: str | QuantileInterpolation = "median_unbiased", + netcdf_version: str | NetcdfVersion = "NETCDF4", + save_thresholds: bool = False, + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ R95p: Days with RR > 95th percentile of daily amounts (very wet days) (days) @@ -3269,15 +3456,16 @@ def r95p( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -3286,51 +3474,53 @@ def r95p( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - base_period_time_range : list[datetime ] | list[str] | tuple[str, str] | None - ``optional`` Temporal range of the reference period on which percentiles are - computed. + base_period_time_range: list[datetime ] | list[str] | tuple[str, str] | None + ``optional`` Temporal range of the reference period. + The dates can either be given as instance of datetime.datetime or as string + values. + It is used either: + #. to compute percentiles if threshold is filled. When missing, the studied period is used to compute percentiles. The study period is either the dataset filtered by `time_range` or the whole - dataset if `time_range` is None. - On temperature based indices relying on percentiles (TX90p, WSDI...), the + dataset if `time_range` is missing. + For day of year percentiles (doy_per), on extreme percentiles the overlapping period between `base_period_time_range` and the study period is bootstrapped. - On indices not relying on percentiles, this parameter is ignored. - The dates can either be given as instance of datetime.datetime or as string - values. - For strings, many format are accepted. - only_leap_years : bool + #. to compute a reference period for indices such as difference_of_mean + (a.k.a anomaly) if a single variable is given in input. + only_leap_years: bool ``optional`` Option for February 29th (default: False). - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - interpolation : str | QuantileInterpolation | None + interpolation: str | QuantileInterpolation | None ``optional`` Interpolation method to compute percentile values: - ``{"linear", "hyndman_fan"}`` - Default is "hyndman_fan", a.k.a type 8 or method 8. + ``{"linear", "median_unbiased"}`` + Default is "median_unbiased", a.k.a type 8 or method 8. Ignored for non percentile based indices. - out_unit : str | None - ``optional`` Output unit for certain indices: "days" or "%" (default: "days"). - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - save_percentile : bool - ``optional`` True if the percentiles should be saved within the resulting netcdf + save_thresholds: bool + ``optional`` True if the thresholds should be saved within the resulting netcdf file (default: False). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -3347,28 +3537,36 @@ def r95p( only_leap_years=only_leap_years, ignore_Feb29th=ignore_Feb29th, interpolation=interpolation, - out_unit=out_unit, netcdf_version=netcdf_version, - save_percentile=save_percentile, + save_thresholds=save_thresholds, logs_verbosity=logs_verbosity, + date_event=date_event, + threshold=Threshold( + query="> 95 period_per", + doy_window_width=5, + only_leap_years=only_leap_years, + interpolation=interpolation, + reference_period=base_period_time_range, + threshold_min_value="1 mm/day", + ), + out_unit="day", ) def r95ptot( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, - base_period_time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + base_period_time_range: Sequence[datetime] | Sequence[str] | None = None, only_leap_years: bool = False, ignore_Feb29th: bool = False, - interpolation: str - | QuantileInterpolation - | None = QuantileInterpolation.MEDIAN_UNBIASED, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - save_percentile: bool = False, - logs_verbosity: Verbosity | str = Verbosity.LOW, + interpolation: str | QuantileInterpolation = "median_unbiased", + netcdf_version: str | NetcdfVersion = "NETCDF4", + save_thresholds: bool = False, + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ R95pTOT: Precipitation fraction due to very wet days (> 95th percentile) @@ -3377,15 +3575,16 @@ def r95ptot( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -3394,49 +3593,53 @@ def r95ptot( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - base_period_time_range : list[datetime ] | list[str] | tuple[str, str] | None - ``optional`` Temporal range of the reference period on which percentiles are - computed. + base_period_time_range: list[datetime ] | list[str] | tuple[str, str] | None + ``optional`` Temporal range of the reference period. + The dates can either be given as instance of datetime.datetime or as string + values. + It is used either: + #. to compute percentiles if threshold is filled. When missing, the studied period is used to compute percentiles. The study period is either the dataset filtered by `time_range` or the whole - dataset if `time_range` is None. - On temperature based indices relying on percentiles (TX90p, WSDI...), the + dataset if `time_range` is missing. + For day of year percentiles (doy_per), on extreme percentiles the overlapping period between `base_period_time_range` and the study period is bootstrapped. - On indices not relying on percentiles, this parameter is ignored. - The dates can either be given as instance of datetime.datetime or as string - values. - For strings, many format are accepted. - only_leap_years : bool + #. to compute a reference period for indices such as difference_of_mean + (a.k.a anomaly) if a single variable is given in input. + only_leap_years: bool ``optional`` Option for February 29th (default: False). - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - interpolation : str | QuantileInterpolation | None + interpolation: str | QuantileInterpolation | None ``optional`` Interpolation method to compute percentile values: - ``{"linear", "hyndman_fan"}`` - Default is "hyndman_fan", a.k.a type 8 or method 8. + ``{"linear", "median_unbiased"}`` + Default is "median_unbiased", a.k.a type 8 or method 8. Ignored for non percentile based indices. - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - save_percentile : bool - ``optional`` True if the percentiles should be saved within the resulting netcdf + save_thresholds: bool + ``optional`` True if the thresholds should be saved within the resulting netcdf file (default: False). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -3454,44 +3657,53 @@ def r95ptot( ignore_Feb29th=ignore_Feb29th, interpolation=interpolation, netcdf_version=netcdf_version, - save_percentile=save_percentile, + save_thresholds=save_thresholds, logs_verbosity=logs_verbosity, + date_event=date_event, + threshold=Threshold( + query="> 95 period_per", + doy_window_width=5, + only_leap_years=only_leap_years, + interpolation=interpolation, + reference_period=base_period_time_range, + threshold_min_value="1 mm/day", + ), + out_unit="%", ) def r99p( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, - base_period_time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + base_period_time_range: Sequence[datetime] | Sequence[str] | None = None, only_leap_years: bool = False, ignore_Feb29th: bool = False, - interpolation: str - | QuantileInterpolation - | None = QuantileInterpolation.MEDIAN_UNBIASED, - out_unit: str | None = None, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - save_percentile: bool = False, - logs_verbosity: Verbosity | str = Verbosity.LOW, + interpolation: str | QuantileInterpolation = "median_unbiased", + netcdf_version: str | NetcdfVersion = "NETCDF4", + save_thresholds: bool = False, + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ - R99p: Days with RR > 99th percentile of daily amounts (extremely wet days) (days) + R99p: Days with RR > 99th percentile of daily amounts (extremely wet days) Source: ECA&D, Algorithm Theoretical Basis Document (ATBD) v11. Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -3500,51 +3712,53 @@ def r99p( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - base_period_time_range : list[datetime ] | list[str] | tuple[str, str] | None - ``optional`` Temporal range of the reference period on which percentiles are - computed. + base_period_time_range: list[datetime ] | list[str] | tuple[str, str] | None + ``optional`` Temporal range of the reference period. + The dates can either be given as instance of datetime.datetime or as string + values. + It is used either: + #. to compute percentiles if threshold is filled. When missing, the studied period is used to compute percentiles. The study period is either the dataset filtered by `time_range` or the whole - dataset if `time_range` is None. - On temperature based indices relying on percentiles (TX90p, WSDI...), the + dataset if `time_range` is missing. + For day of year percentiles (doy_per), on extreme percentiles the overlapping period between `base_period_time_range` and the study period is bootstrapped. - On indices not relying on percentiles, this parameter is ignored. - The dates can either be given as instance of datetime.datetime or as string - values. - For strings, many format are accepted. - only_leap_years : bool + #. to compute a reference period for indices such as difference_of_mean + (a.k.a anomaly) if a single variable is given in input. + only_leap_years: bool ``optional`` Option for February 29th (default: False). - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - interpolation : str | QuantileInterpolation | None + interpolation: str | QuantileInterpolation | None ``optional`` Interpolation method to compute percentile values: - ``{"linear", "hyndman_fan"}`` - Default is "hyndman_fan", a.k.a type 8 or method 8. + ``{"linear", "median_unbiased"}`` + Default is "median_unbiased", a.k.a type 8 or method 8. Ignored for non percentile based indices. - out_unit : str | None - ``optional`` Output unit for certain indices: "days" or "%" (default: "days"). - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - save_percentile : bool - ``optional`` True if the percentiles should be saved within the resulting netcdf + save_thresholds: bool + ``optional`` True if the thresholds should be saved within the resulting netcdf file (default: False). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -3561,28 +3775,36 @@ def r99p( only_leap_years=only_leap_years, ignore_Feb29th=ignore_Feb29th, interpolation=interpolation, - out_unit=out_unit, netcdf_version=netcdf_version, - save_percentile=save_percentile, + save_thresholds=save_thresholds, logs_verbosity=logs_verbosity, + date_event=date_event, + threshold=Threshold( + query="> 99 period_per", + doy_window_width=5, + only_leap_years=only_leap_years, + interpolation=interpolation, + reference_period=base_period_time_range, + threshold_min_value="1 mm/day", + ), + out_unit="day", ) def r99ptot( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, - base_period_time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + base_period_time_range: Sequence[datetime] | Sequence[str] | None = None, only_leap_years: bool = False, ignore_Feb29th: bool = False, - interpolation: str - | QuantileInterpolation - | None = QuantileInterpolation.MEDIAN_UNBIASED, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - save_percentile: bool = False, - logs_verbosity: Verbosity | str = Verbosity.LOW, + interpolation: str | QuantileInterpolation = "median_unbiased", + netcdf_version: str | NetcdfVersion = "NETCDF4", + save_thresholds: bool = False, + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ R99pTOT: Precipitation fraction due to extremely wet days (> 99th percentile) @@ -3591,15 +3813,16 @@ def r99ptot( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -3608,49 +3831,53 @@ def r99ptot( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - base_period_time_range : list[datetime ] | list[str] | tuple[str, str] | None - ``optional`` Temporal range of the reference period on which percentiles are - computed. + base_period_time_range: list[datetime ] | list[str] | tuple[str, str] | None + ``optional`` Temporal range of the reference period. + The dates can either be given as instance of datetime.datetime or as string + values. + It is used either: + #. to compute percentiles if threshold is filled. When missing, the studied period is used to compute percentiles. The study period is either the dataset filtered by `time_range` or the whole - dataset if `time_range` is None. - On temperature based indices relying on percentiles (TX90p, WSDI...), the + dataset if `time_range` is missing. + For day of year percentiles (doy_per), on extreme percentiles the overlapping period between `base_period_time_range` and the study period is bootstrapped. - On indices not relying on percentiles, this parameter is ignored. - The dates can either be given as instance of datetime.datetime or as string - values. - For strings, many format are accepted. - only_leap_years : bool + #. to compute a reference period for indices such as difference_of_mean + (a.k.a anomaly) if a single variable is given in input. + only_leap_years: bool ``optional`` Option for February 29th (default: False). - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - interpolation : str | QuantileInterpolation | None + interpolation: str | QuantileInterpolation | None ``optional`` Interpolation method to compute percentile values: - ``{"linear", "hyndman_fan"}`` - Default is "hyndman_fan", a.k.a type 8 or method 8. + ``{"linear", "median_unbiased"}`` + Default is "median_unbiased", a.k.a type 8 or method 8. Ignored for non percentile based indices. - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - save_percentile : bool - ``optional`` True if the percentiles should be saved within the resulting netcdf + save_thresholds: bool + ``optional`` True if the thresholds should be saved within the resulting netcdf file (default: False). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -3668,20 +3895,31 @@ def r99ptot( ignore_Feb29th=ignore_Feb29th, interpolation=interpolation, netcdf_version=netcdf_version, - save_percentile=save_percentile, + save_thresholds=save_thresholds, logs_verbosity=logs_verbosity, + date_event=date_event, + threshold=Threshold( + query="> 99 period_per", + doy_window_width=5, + only_leap_years=only_leap_years, + interpolation=interpolation, + reference_period=base_period_time_range, + threshold_min_value="1 mm/day", + ), + out_unit="%", ) def sd( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, ignore_Feb29th: bool = False, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - logs_verbosity: Verbosity | str = Verbosity.LOW, + netcdf_version: str | NetcdfVersion = "NETCDF4", + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ SD: Mean of daily snow depth @@ -3690,15 +3928,16 @@ def sd( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -3707,26 +3946,29 @@ def sd( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -3742,18 +3984,21 @@ def sd( ignore_Feb29th=ignore_Feb29th, netcdf_version=netcdf_version, logs_verbosity=logs_verbosity, + date_event=date_event, + out_unit="cm", ) def sd1( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, ignore_Feb29th: bool = False, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - logs_verbosity: Verbosity | str = Verbosity.LOW, + netcdf_version: str | NetcdfVersion = "NETCDF4", + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ SD1: Snow days (SD >= 1 cm) @@ -3762,15 +4007,16 @@ def sd1( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -3779,26 +4025,29 @@ def sd1( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -3814,18 +4063,24 @@ def sd1( ignore_Feb29th=ignore_Feb29th, netcdf_version=netcdf_version, logs_verbosity=logs_verbosity, + date_event=date_event, + threshold=Threshold( + query=">= 1 cm", + ), + out_unit="day", ) def sd5cm( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, ignore_Feb29th: bool = False, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - logs_verbosity: Verbosity | str = Verbosity.LOW, + netcdf_version: str | NetcdfVersion = "NETCDF4", + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ SD5cm: Number of days with snow depth >= 5 cm @@ -3834,15 +4089,16 @@ def sd5cm( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -3851,26 +4107,29 @@ def sd5cm( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -3886,18 +4145,24 @@ def sd5cm( ignore_Feb29th=ignore_Feb29th, netcdf_version=netcdf_version, logs_verbosity=logs_verbosity, + date_event=date_event, + threshold=Threshold( + query=">= 5 cm", + ), + out_unit="day", ) def sd50cm( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, ignore_Feb29th: bool = False, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - logs_verbosity: Verbosity | str = Verbosity.LOW, + netcdf_version: str | NetcdfVersion = "NETCDF4", + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ SD50cm: Number of days with snow depth >= 50 cm @@ -3906,15 +4171,16 @@ def sd50cm( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -3923,26 +4189,29 @@ def sd50cm( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -3958,43 +4227,46 @@ def sd50cm( ignore_Feb29th=ignore_Feb29th, netcdf_version=netcdf_version, logs_verbosity=logs_verbosity, + date_event=date_event, + threshold=Threshold( + query=">= 50 cm", + ), + out_unit="day", ) def cd( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, - base_period_time_range: list[datetime] | list[str] | tuple[str, str] | None = None, - window_width: int = 5, + base_period_time_range: Sequence[datetime] | Sequence[str] | None = None, only_leap_years: bool = False, ignore_Feb29th: bool = False, - interpolation: str - | QuantileInterpolation - | None = QuantileInterpolation.MEDIAN_UNBIASED, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - save_percentile: bool = False, - logs_verbosity: Verbosity | str = Verbosity.LOW, + interpolation: str | QuantileInterpolation = "median_unbiased", + netcdf_version: str | NetcdfVersion = "NETCDF4", + save_thresholds: bool = False, + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ - CD: Days with TG < 25th percentile of daily mean temperature - and RR <25th percentile of daily precipitation sum (cold/dry days) + CD: Days with TG < 25th percentile of daily mean temperature and RR <25th percentile of daily precipitation sum (cold/dry days) Source: ECA&D, Algorithm Theoretical Basis Document (ATBD) v11. Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -4003,52 +4275,53 @@ def cd( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - base_period_time_range : list[datetime ] | list[str] | tuple[str, str] | None - ``optional`` Temporal range of the reference period on which percentiles are - computed. + base_period_time_range: list[datetime ] | list[str] | tuple[str, str] | None + ``optional`` Temporal range of the reference period. + The dates can either be given as instance of datetime.datetime or as string + values. + It is used either: + #. to compute percentiles if threshold is filled. When missing, the studied period is used to compute percentiles. The study period is either the dataset filtered by `time_range` or the whole - dataset if `time_range` is None. - On temperature based indices relying on percentiles (TX90p, WSDI...), the + dataset if `time_range` is missing. + For day of year percentiles (doy_per), on extreme percentiles the overlapping period between `base_period_time_range` and the study period is bootstrapped. - On indices not relying on percentiles, this parameter is ignored. - The dates can either be given as instance of datetime.datetime or as string - values. - For strings, many format are accepted. - window_width : int - ``optional`` User defined window width for related indices (default: 5). - Ignored for non related indices. - only_leap_years : bool + #. to compute a reference period for indices such as difference_of_mean + (a.k.a anomaly) if a single variable is given in input. + only_leap_years: bool ``optional`` Option for February 29th (default: False). - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - interpolation : str | QuantileInterpolation | None + interpolation: str | QuantileInterpolation | None ``optional`` Interpolation method to compute percentile values: - ``{"linear", "hyndman_fan"}`` - Default is "hyndman_fan", a.k.a type 8 or method 8. + ``{"linear", "median_unbiased"}`` + Default is "median_unbiased", a.k.a type 8 or method 8. Ignored for non percentile based indices. - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - save_percentile : bool - ``optional`` True if the percentiles should be saved within the resulting netcdf + save_thresholds: bool + ``optional`` True if the thresholds should be saved within the resulting netcdf file (default: False). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -4062,50 +4335,66 @@ def cd( time_range=time_range, out_file=out_file, base_period_time_range=base_period_time_range, - window_width=window_width, only_leap_years=only_leap_years, ignore_Feb29th=ignore_Feb29th, interpolation=interpolation, netcdf_version=netcdf_version, - save_percentile=save_percentile, + save_thresholds=save_thresholds, logs_verbosity=logs_verbosity, + date_event=date_event, + threshold=[ + Threshold( + query="< 25 doy_per", + doy_window_width=5, + only_leap_years=only_leap_years, + interpolation=interpolation, + reference_period=base_period_time_range, + ), + Threshold( + query="< 25 period_per", + doy_window_width=5, + only_leap_years=only_leap_years, + interpolation=interpolation, + reference_period=base_period_time_range, + threshold_min_value="1 mm/day", + ), + ], + out_unit="day", ) def cw( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, - base_period_time_range: list[datetime] | list[str] | tuple[str, str] | None = None, - window_width: int = 5, + base_period_time_range: Sequence[datetime] | Sequence[str] | None = None, only_leap_years: bool = False, ignore_Feb29th: bool = False, - interpolation: str - | QuantileInterpolation - | None = QuantileInterpolation.MEDIAN_UNBIASED, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - save_percentile: bool = False, - logs_verbosity: Verbosity | str = Verbosity.LOW, + interpolation: str | QuantileInterpolation = "median_unbiased", + netcdf_version: str | NetcdfVersion = "NETCDF4", + save_thresholds: bool = False, + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ - CW: Days with TG < 25th percentile of daily mean temperature - and RR >75th percentile of daily precipitation sum (cold/wet days) + CW: Days with TG < 25th percentile of daily mean temperature and RR >75th percentile of daily precipitation sum (cold/wet days) Source: ECA&D, Algorithm Theoretical Basis Document (ATBD) v11. Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -4114,52 +4403,53 @@ def cw( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - base_period_time_range : list[datetime ] | list[str] | tuple[str, str] | None - ``optional`` Temporal range of the reference period on which percentiles are - computed. + base_period_time_range: list[datetime ] | list[str] | tuple[str, str] | None + ``optional`` Temporal range of the reference period. + The dates can either be given as instance of datetime.datetime or as string + values. + It is used either: + #. to compute percentiles if threshold is filled. When missing, the studied period is used to compute percentiles. The study period is either the dataset filtered by `time_range` or the whole - dataset if `time_range` is None. - On temperature based indices relying on percentiles (TX90p, WSDI...), the + dataset if `time_range` is missing. + For day of year percentiles (doy_per), on extreme percentiles the overlapping period between `base_period_time_range` and the study period is bootstrapped. - On indices not relying on percentiles, this parameter is ignored. - The dates can either be given as instance of datetime.datetime or as string - values. - For strings, many format are accepted. - window_width : int - ``optional`` User defined window width for related indices (default: 5). - Ignored for non related indices. - only_leap_years : bool + #. to compute a reference period for indices such as difference_of_mean + (a.k.a anomaly) if a single variable is given in input. + only_leap_years: bool ``optional`` Option for February 29th (default: False). - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - interpolation : str | QuantileInterpolation | None + interpolation: str | QuantileInterpolation | None ``optional`` Interpolation method to compute percentile values: - ``{"linear", "hyndman_fan"}`` - Default is "hyndman_fan", a.k.a type 8 or method 8. + ``{"linear", "median_unbiased"}`` + Default is "median_unbiased", a.k.a type 8 or method 8. Ignored for non percentile based indices. - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - save_percentile : bool - ``optional`` True if the percentiles should be saved within the resulting netcdf + save_thresholds: bool + ``optional`` True if the thresholds should be saved within the resulting netcdf file (default: False). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -4173,50 +4463,66 @@ def cw( time_range=time_range, out_file=out_file, base_period_time_range=base_period_time_range, - window_width=window_width, only_leap_years=only_leap_years, ignore_Feb29th=ignore_Feb29th, interpolation=interpolation, netcdf_version=netcdf_version, - save_percentile=save_percentile, + save_thresholds=save_thresholds, logs_verbosity=logs_verbosity, + date_event=date_event, + threshold=[ + Threshold( + query="< 25 doy_per", + doy_window_width=5, + only_leap_years=only_leap_years, + interpolation=interpolation, + reference_period=base_period_time_range, + ), + Threshold( + query="> 75 period_per", + doy_window_width=5, + only_leap_years=only_leap_years, + interpolation=interpolation, + reference_period=base_period_time_range, + threshold_min_value="1 mm/day", + ), + ], + out_unit="day", ) def wd( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, - base_period_time_range: list[datetime] | list[str] | tuple[str, str] | None = None, - window_width: int = 5, + base_period_time_range: Sequence[datetime] | Sequence[str] | None = None, only_leap_years: bool = False, ignore_Feb29th: bool = False, - interpolation: str - | QuantileInterpolation - | None = QuantileInterpolation.MEDIAN_UNBIASED, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - save_percentile: bool = False, - logs_verbosity: Verbosity | str = Verbosity.LOW, + interpolation: str | QuantileInterpolation = "median_unbiased", + netcdf_version: str | NetcdfVersion = "NETCDF4", + save_thresholds: bool = False, + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ - WD: Days with TG > 75th percentile of daily mean temperature - and RR <25th percentile of daily precipitation sum (warm/dry days) + WD: Days with TG > 75th percentile of daily mean temperature and RR <25th percentile of daily precipitation sum (warm/dry days) Source: ECA&D, Algorithm Theoretical Basis Document (ATBD) v11. Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -4225,52 +4531,53 @@ def wd( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - base_period_time_range : list[datetime ] | list[str] | tuple[str, str] | None - ``optional`` Temporal range of the reference period on which percentiles are - computed. + base_period_time_range: list[datetime ] | list[str] | tuple[str, str] | None + ``optional`` Temporal range of the reference period. + The dates can either be given as instance of datetime.datetime or as string + values. + It is used either: + #. to compute percentiles if threshold is filled. When missing, the studied period is used to compute percentiles. The study period is either the dataset filtered by `time_range` or the whole - dataset if `time_range` is None. - On temperature based indices relying on percentiles (TX90p, WSDI...), the + dataset if `time_range` is missing. + For day of year percentiles (doy_per), on extreme percentiles the overlapping period between `base_period_time_range` and the study period is bootstrapped. - On indices not relying on percentiles, this parameter is ignored. - The dates can either be given as instance of datetime.datetime or as string - values. - For strings, many format are accepted. - window_width : int - ``optional`` User defined window width for related indices (default: 5). - Ignored for non related indices. - only_leap_years : bool + #. to compute a reference period for indices such as difference_of_mean + (a.k.a anomaly) if a single variable is given in input. + only_leap_years: bool ``optional`` Option for February 29th (default: False). - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - interpolation : str | QuantileInterpolation | None + interpolation: str | QuantileInterpolation | None ``optional`` Interpolation method to compute percentile values: - ``{"linear", "hyndman_fan"}`` - Default is "hyndman_fan", a.k.a type 8 or method 8. + ``{"linear", "median_unbiased"}`` + Default is "median_unbiased", a.k.a type 8 or method 8. Ignored for non percentile based indices. - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - save_percentile : bool - ``optional`` True if the percentiles should be saved within the resulting netcdf + save_thresholds: bool + ``optional`` True if the thresholds should be saved within the resulting netcdf file (default: False). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -4284,50 +4591,66 @@ def wd( time_range=time_range, out_file=out_file, base_period_time_range=base_period_time_range, - window_width=window_width, only_leap_years=only_leap_years, ignore_Feb29th=ignore_Feb29th, interpolation=interpolation, netcdf_version=netcdf_version, - save_percentile=save_percentile, + save_thresholds=save_thresholds, logs_verbosity=logs_verbosity, + date_event=date_event, + threshold=[ + Threshold( + query="> 75 doy_per", + doy_window_width=5, + only_leap_years=only_leap_years, + interpolation=interpolation, + reference_period=base_period_time_range, + ), + Threshold( + query="< 25 period_per", + doy_window_width=5, + only_leap_years=only_leap_years, + interpolation=interpolation, + reference_period=base_period_time_range, + threshold_min_value="1 mm/day", + ), + ], + out_unit="day", ) def ww( - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, - base_period_time_range: list[datetime] | list[str] | tuple[str, str] | None = None, - window_width: int = 5, + base_period_time_range: Sequence[datetime] | Sequence[str] | None = None, only_leap_years: bool = False, ignore_Feb29th: bool = False, - interpolation: str - | QuantileInterpolation - | None = QuantileInterpolation.MEDIAN_UNBIASED, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - save_percentile: bool = False, - logs_verbosity: Verbosity | str = Verbosity.LOW, + interpolation: str | QuantileInterpolation = "median_unbiased", + netcdf_version: str | NetcdfVersion = "NETCDF4", + save_thresholds: bool = False, + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, ) -> Dataset: """ - WW: Days with TG > 75th percentile of daily mean temperature - and RR >75th percentile of daily precipitation sum (warm/wet days) + WW: Days with TG > 75th percentile of daily mean temperature and RR >75th percentile of daily precipitation sum (warm/wet days) Source: ECA&D, Algorithm Theoretical Basis Document (ATBD) v11. Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -4336,52 +4659,53 @@ def ww( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - base_period_time_range : list[datetime ] | list[str] | tuple[str, str] | None - ``optional`` Temporal range of the reference period on which percentiles are - computed. + base_period_time_range: list[datetime ] | list[str] | tuple[str, str] | None + ``optional`` Temporal range of the reference period. + The dates can either be given as instance of datetime.datetime or as string + values. + It is used either: + #. to compute percentiles if threshold is filled. When missing, the studied period is used to compute percentiles. The study period is either the dataset filtered by `time_range` or the whole - dataset if `time_range` is None. - On temperature based indices relying on percentiles (TX90p, WSDI...), the + dataset if `time_range` is missing. + For day of year percentiles (doy_per), on extreme percentiles the overlapping period between `base_period_time_range` and the study period is bootstrapped. - On indices not relying on percentiles, this parameter is ignored. - The dates can either be given as instance of datetime.datetime or as string - values. - For strings, many format are accepted. - window_width : int - ``optional`` User defined window width for related indices (default: 5). - Ignored for non related indices. - only_leap_years : bool + #. to compute a reference period for indices such as difference_of_mean + (a.k.a anomaly) if a single variable is given in input. + only_leap_years: bool ``optional`` Option for February 29th (default: False). - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - interpolation : str | QuantileInterpolation | None + interpolation: str | QuantileInterpolation | None ``optional`` Interpolation method to compute percentile values: - ``{"linear", "hyndman_fan"}`` - Default is "hyndman_fan", a.k.a type 8 or method 8. + ``{"linear", "median_unbiased"}`` + Default is "median_unbiased", a.k.a type 8 or method 8. Ignored for non percentile based indices. - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - save_percentile : bool - ``optional`` True if the percentiles should be saved within the resulting netcdf + save_thresholds: bool + ``optional`` True if the thresholds should be saved within the resulting netcdf file (default: False). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. Notes ----- This function has been auto-generated. @@ -4395,30 +4719,54 @@ def ww( time_range=time_range, out_file=out_file, base_period_time_range=base_period_time_range, - window_width=window_width, only_leap_years=only_leap_years, ignore_Feb29th=ignore_Feb29th, interpolation=interpolation, netcdf_version=netcdf_version, - save_percentile=save_percentile, + save_thresholds=save_thresholds, logs_verbosity=logs_verbosity, + date_event=date_event, + threshold=[ + Threshold( + query="> 75 doy_per", + doy_window_width=5, + only_leap_years=only_leap_years, + interpolation=interpolation, + reference_period=base_period_time_range, + ), + Threshold( + query="> 75 period_per", + doy_window_width=5, + only_leap_years=only_leap_years, + interpolation=interpolation, + reference_period=base_period_time_range, + threshold_min_value="1 mm/day", + ), + ], + out_unit="day", ) def custom_index( user_index: UserIndexDict, - in_files: InFileType, - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + in_files: InFileLike, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, - base_period_time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + base_period_time_range: Sequence[datetime] | Sequence[str] | None = None, + doy_window_width: int = 5, only_leap_years: bool = False, ignore_Feb29th: bool = False, + interpolation: str | QuantileInterpolation = "median_unbiased", out_unit: str | None = None, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - save_percentile: bool = False, - logs_verbosity: Verbosity | str = Verbosity.LOW, + netcdf_version: str | NetcdfVersion = "NETCDF4", + save_thresholds: bool = False, + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, + min_spell_length: int | None = 6, + rolling_window_width: int | None = 5, + sampling_method: SamplingMethodLike = "resample", ) -> Dataset: """ This function can be used to create indices using simple operators. @@ -4427,15 +4775,16 @@ def custom_index( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -4444,46 +4793,72 @@ def custom_index( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - base_period_time_range : list[datetime ] | list[str] | tuple[str, str] | None - ``optional`` Temporal range of the reference period on which percentiles are - computed. + base_period_time_range: list[datetime ] | list[str] | tuple[str, str] | None + ``optional`` Temporal range of the reference period. + The dates can either be given as instance of datetime.datetime or as string + values. + It is used either: + #. to compute percentiles if threshold is filled. When missing, the studied period is used to compute percentiles. The study period is either the dataset filtered by `time_range` or the whole - dataset if `time_range` is None. - On temperature based indices relying on percentiles (TX90p, WSDI...), the + dataset if `time_range` is missing. + For day of year percentiles (doy_per), on extreme percentiles the overlapping period between `base_period_time_range` and the study period is bootstrapped. - On indices not relying on percentiles, this parameter is ignored. - The dates can either be given as instance of datetime.datetime or as string - values. - For strings, many format are accepted. - only_leap_years : bool + #. to compute a reference period for indices such as difference_of_mean + (a.k.a anomaly) if a single variable is given in input. + doy_window_width: int + ``optional`` Window width used to aggreagte day of year values when computing + day of year percentiles (doy_per) + Default: 5 (5 days). + only_leap_years: bool ``optional`` Option for February 29th (default: False). - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - out_unit : str | None + interpolation: str | QuantileInterpolation | None + ``optional`` Interpolation method to compute percentile values: + ``{"linear", "median_unbiased"}`` + Default is "median_unbiased", a.k.a type 8 or method 8. + Ignored for non percentile based indices. + out_unit: str | None ``optional`` Output unit for certain indices: "days" or "%" (default: "days"). - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - save_percentile : bool - ``optional`` True if the percentiles should be saved within the resulting netcdf + save_thresholds: bool + ``optional`` True if the thresholds should be saved within the resulting netcdf file (default: False). - logs_verbosity : str | Verbosity + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. + min_spell_length: int + ``optional`` Minimum spell duration to be taken into account when computing the + sum_of_spell_lengths. + rolling_window_width: int + ``optional`` Window width of the rolling window for indicators such as + `{max_of_rolling_sum, max_of_rolling_average, min_of_rolling_sum, min_of_rolling_average}` # noqa + sampling_method: str + Choose whether the output sampling configured in `slice_mode` is a + `groupby` operation or a `resample` operation (as per xarray definition). + Possible values: ``{"groupby", "resample", "groupby_ref_and_resample_study"}`` + (default: "resample") + `groupby_ref_and_resample_study` may only be used when computing the + `difference_of_means` (a.k.a the anomaly). Notes ----- This function has been auto-generated. @@ -4496,10 +4871,16 @@ def custom_index( time_range=time_range, out_file=out_file, base_period_time_range=base_period_time_range, + doy_window_width=doy_window_width, only_leap_years=only_leap_years, ignore_Feb29th=ignore_Feb29th, + interpolation=interpolation, out_unit=out_unit, netcdf_version=netcdf_version, - save_percentile=save_percentile, + save_thresholds=save_thresholds, logs_verbosity=logs_verbosity, + date_event=date_event, + min_spell_length=min_spell_length, + rolling_window_width=rolling_window_width, + sampling_method=sampling_method, ) diff --git a/icclim/clix_meta/clix_meta_indices.py b/icclim/clix_meta/clix_meta_indices.py deleted file mode 100644 index ab576856..00000000 --- a/icclim/clix_meta/clix_meta_indices.py +++ /dev/null @@ -1,68 +0,0 @@ -from __future__ import annotations - -""" Wrapper for clix-meta yaml file. - This read the yaml and make its content accessible an instance of ClixMetaIndices. - It also exposes some type hints of yaml content. -""" -import os -from pathlib import Path -from typing import Any, TypedDict - -import yaml - -CLIX_YAML_PATH = ( - Path(os.path.dirname(os.path.abspath(__file__))) / "index_definitions.yml" -) - - -class EtMetadata(TypedDict): - short_name: str - long_name: str - definition: str - comment: str - - -class OutputMetadata(TypedDict): - var_name: str - standard_name: str - long_name: str - units: str - cell_methods: dict - - -class ClixMetaIndex(TypedDict): - reference: str - period: dict - output: OutputMetadata - input: dict - index_function: dict - ET: EtMetadata - - -class ClixMetaIndices: - """ - Singleton to access content of clix-meta yaml file. - """ - - __instance: Any = None - indices_record: dict[str, ClixMetaIndex] - - @staticmethod - def get_instance(): - if ClixMetaIndices.__instance is None: - ClixMetaIndices.__instance = ClixMetaIndices() - return ClixMetaIndices.__instance - - def __init__(self): - if ClixMetaIndices.__instance is not None: - raise Exception("This class is a singleton! Use Clix::get_instance.") - else: - ClixMetaIndices.__instance = self - with open(CLIX_YAML_PATH, "r") as clix_meta_file: - self.indices_record = yaml.safe_load(clix_meta_file)["indices"] - - def lookup(self, query: str) -> ClixMetaIndex | None: - for index in self.indices_record.keys(): - if index.upper() == query.upper(): - return self.indices_record[index] - return None diff --git a/icclim/clix_meta/index_definitions.yml b/icclim/clix_meta/index_definitions.yml deleted file mode 100644 index 643942d4..00000000 --- a/icclim/clix_meta/index_definitions.yml +++ /dev/null @@ -1,4043 +0,0 @@ -# These index definitions are auto-generated from the master table at -# https://github.com/clix-meta/clix-meta - -# This is based on version 0.3.0. ---- -indices: - fd: - reference: ECAD - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "fd" - standard_name: number_of_days_with_air_temperature_below_threshold - proposed_standard_name: number_of_occurrences_with_air_temperature_below_threshold - long_name: "Number of Frost Days (Tmin < 0C)" - units: "1" - cell_methods: - - time: minimum within days - - time: sum over days - input: - data: tasmin - index_function: - name: count_occurrences - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: 0 - units: "degree_Celsius" - condition: - kind: operator - operator: "<" - ET: - short_name: "fd" - long_name: "Frost days" - definition: "Count when TN < 0ºC" - comment: "count of days when daily minimum temperature is below 0 degC" - - tnlt2: - reference: ET-SCI - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "tnlt2" - standard_name: number_of_days_with_air_temperature_below_threshold - proposed_standard_name: number_of_occurrences_with_air_temperature_below_threshold - long_name: "Number of weak Frost Days (Tmin < +2C)" - units: "1" - cell_methods: - - time: minimum within days - - time: sum over days - input: - data: tasmin - index_function: - name: count_occurrences - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: 2 - units: "degree_Celsius" - condition: - kind: operator - operator: "<" - ET: - short_name: "fd2" - long_name: "Frost days 2" - definition: "Count when TN < 2ºC" - comment: "count of days when daily minimum temperature is below plus 2 degC" - - tnltm2: - reference: ET-SCI - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "tnltm2" - standard_name: number_of_days_with_air_temperature_below_threshold - proposed_standard_name: number_of_occurrences_with_air_temperature_below_threshold - long_name: "Number of sharp Frost Days (Tmin < -2C)" - units: "1" - cell_methods: - - time: minimum within days - - time: sum over days - input: - data: tasmin - index_function: - name: count_occurrences - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: -2 - units: "degree_Celsius" - condition: - kind: operator - operator: "<" - ET: - short_name: "fdm2" - long_name: "Hard freeze" - definition: "Count when TN < -2ºC" - comment: "count of days when daily minimum temperature is below minus 2 degC" - - tnltm20: - reference: ET-SCI - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "tnltm20" - standard_name: number_of_days_with_air_temperature_below_threshold - proposed_standard_name: number_of_occurrences_with_air_temperature_below_threshold - long_name: - units: "1" - cell_methods: - - time: minimum within days - - time: sum over days - input: - data: tasmin - index_function: - name: count_occurrences - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: -20 - units: "degree_Celsius" - condition: - kind: operator - operator: "<" - ET: - short_name: "fdm20" - long_name: "Very hard freeze" - definition: "Count when TN < -20ºC" - comment: "count of days when daily minimum temperature is below minus 20 degC" - - id: - reference: ECAD - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "id" - standard_name: number_of_days_with_air_temperature_below_threshold - proposed_standard_name: number_of_occurrences_with_air_temperature_below_threshold - long_name: "Number of sharp Ice Days (Tmax < 0C)" - units: "1" - cell_methods: - - time: maximum within days - - time: sum over days - input: - data: tasmax - index_function: - name: count_occurrences - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: 0 - units: "degree_Celsius" - condition: - kind: operator - operator: "<" - ET: - short_name: "id" - long_name: "Ice days" - definition: "Count when TX < 0ºC" - comment: "count of days when daily maximum temperature is below 0 degC" - - su: - reference: ECAD - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "su" - standard_name: number_of_days_with_air_temperature_above_threshold - proposed_standard_name: number_of_occurrences_with_air_temperature_above_threshold - long_name: "Number of Summer Days (Tmax > 25C)" - units: "1" - cell_methods: - - time: maximum within days - - time: sum over days - input: - data: tasmax - index_function: - name: count_occurrences - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: 25 - units: "degree_Celsius" - condition: - kind: operator - operator: ">" - ET: - short_name: "su" - long_name: "Summer days" - definition: "Count when TX > 25ºC" - comment: "count of days when daily maximum temperature is above plus 25 degC" - - txge30: - reference: ET-SCI - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "txge30" - standard_name: number_of_days_with_air_temperature_above_threshold - proposed_standard_name: number_of_occurrences_with_air_temperature_at_or_above_threshold - long_name: "Number of Hot Days (Tmax >= 35C)" - units: "1" - cell_methods: - - time: maximum within days - - time: sum over days - input: - data: tasmax - index_function: - name: count_occurrences - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: 30 - units: "degree_Celsius" - condition: - kind: operator - operator: ">" - ET: - short_name: "su30" - long_name: "Hot days" - definition: "Count when TX >= 30ºC" - comment: "count of days when daily maximum temperature is above or equal plus 30 degC" - - txge35: - reference: ET-SCI - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "txge35" - standard_name: number_of_days_with_air_temperature_above_threshold - proposed_standard_name: number_of_occurrences_with_air_temperature_at_or_above_threshold - long_name: "Number of Very Hot Days (Tmax >= 35C)" - units: "1" - cell_methods: - - time: maximum within days - - time: sum over days - input: - data: tasmax - index_function: - name: count_occurrences - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: 35 - units: "degree_Celsius" - condition: - kind: operator - operator: ">" - ET: - short_name: "su35" - long_name: "Very hot days" - definition: "Count when TX >= 35ºC" - comment: "count of days when daily maximum temperature is above or equal plus 35 degC" - - tr: - reference: ECAD - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "tr" - standard_name: number_of_days_with_air_temperature_above_threshold - proposed_standard_name: number_of_occurrences_with_air_temperature_above_threshold - long_name: "Number of Tropical Nights (Tmin > 20C)" - units: "1" - cell_methods: - - time: minimum within days - - time: sum over days - input: - data: tasmin - index_function: - name: count_occurrences - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: 20 - units: "degree_Celsius" - condition: - kind: operator - operator: ">" - ET: - short_name: "tr" - long_name: "Tropical nights" - definition: "Count when TN > 20ºC" - comment: "count of days when daily minimum temperature is above plus 20 degC" - - tmge5: - reference: ET-SCI - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "tmge5" - standard_name: number_of_days_with_air_temperature_above_threshold - proposed_standard_name: number_of_occurrences_with_air_temperature_at_or_above_threshold - long_name: "Number of days with Tmean >= 5C" - units: "1" - cell_methods: - - time: mean within days - - time: sum over days - input: - data: tas - index_function: - name: count_occurrences - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: 5 - units: "degree_Celsius" - condition: - kind: operator - operator: ">" - ET: - short_name: "tm5a" - long_name: "TM above 5C" - definition: "Count when TM >= 5ºC" - comment: "count of days when daily mean temperature is above plus 5 degC" - - tmlt5: - reference: ET-SCI - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "tmlt5" - standard_name: number_of_days_with_air_temperature_below_threshold - proposed_standard_name: number_of_occurrences_with_air_temperature_below_threshold - long_name: "Number of days with Tmean < 5C" - units: "1" - cell_methods: - - time: mean within days - - time: sum over days - input: - data: tas - index_function: - name: count_occurrences - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: 5 - units: "degree_Celsius" - condition: - kind: operator - operator: "<" - ET: - short_name: "tm5b" - long_name: "TM below 5C" - definition: "Count when TM < 5ºC" - comment: "count of days when daily mean temperature is below plus 5 degC" - - tmge10: - reference: ET-SCI - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "tmge10" - standard_name: number_of_days_with_air_temperature_above_threshold - proposed_standard_name: number_of_occurrences_with_air_temperature_at_or_above_threshold - long_name: "Number of days with Tmean >= 10C" - units: "1" - cell_methods: - - time: mean within days - - time: sum over days - input: - data: tas - index_function: - name: count_occurrences - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: 10 - units: "degree_Celsius" - condition: - kind: operator - operator: ">" - ET: - short_name: "tm10a" - long_name: "TM above 10C" - definition: "Count when TM >= 10ºC" - comment: "count of days when daily mean temperature is above plus 10 degC" - - tmlt10: - reference: ET-SCI - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "tmlt10" - standard_name: number_of_days_with_air_temperature_below_threshold - proposed_standard_name: number_of_occurrences_with_air_temperature_below_threshold - long_name: "Number of days with Tmean < 10C" - units: "1" - cell_methods: - - time: mean within days - - time: sum over days - input: - data: tas - index_function: - name: count_occurrences - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: 10 - units: "degree_Celsius" - condition: - kind: operator - operator: "<" - ET: - short_name: "tm10b" - long_name: "TM below 10C" - definition: "Count when TM < 10ºC" - comment: "count of days when daily mean temperature is below plus 10 degC" - - tngt{TT}: - reference: CLIPC - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "tngt{TT}" - standard_name: number_of_days_with_air_temperature_above_threshold - proposed_standard_name: number_of_occurrences_with_air_temperature_above_threshold - long_name: "Number of days with Tmin > {TT}C" - units: "1" - cell_methods: - - time: minimum within days - - time: sum over days - input: - data: tasmin - index_function: - name: count_occurrences - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: {TT} - units: "degree_Celsius" - condition: - kind: operator - operator: ">" - ET: - short_name: - long_name: - definition: - comment: "count of days when daily minimum temperature is above {TT} degC" - - tnlt{TT}: - reference: CLIPC - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "tnlt{TT}" - standard_name: number_of_days_with_air_temperature_below_threshold - proposed_standard_name: number_of_occurrences_with_air_temperature_below_threshold - long_name: "Number of days with Tmin < {TT}C" - units: "1" - cell_methods: - - time: minimum within days - - time: sum over days - input: - data: tasmin - index_function: - name: count_occurrences - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: {TT} - units: "degree_Celsius" - condition: - kind: operator - operator: "<" - ET: - short_name: - long_name: - definition: - comment: "count of days when daily minimum temperature is below {TT} degC" - - tnge{TT}: - reference: CLIPC - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "tnge{TT}" - standard_name: number_of_days_with_air_temperature_above_threshold - proposed_standard_name: number_of_occurrences_with_air_temperature_at_or_above_threshold - long_name: "Number of days with Tmin >= {TT}C" - units: "1" - cell_methods: - - time: minimum within days - - time: sum over days - input: - data: tasmin - index_function: - name: count_occurrences - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: {TT} - units: "degree_Celsius" - condition: - kind: operator - operator: ">" - ET: - short_name: - long_name: - definition: - comment: "count of days when daily minimum temperature is above or equal {TT} degC" - - tnle{TT}: - reference: CLIPC - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "tnle{TT}" - standard_name: number_of_days_with_air_temperature_below_threshold - proposed_standard_name: number_of_occurrences_with_air_temperature_at_or_below_threshold - long_name: "Number of days with Tmin <= {TT}C" - units: "1" - cell_methods: - - time: minimum within days - - time: sum over days - input: - data: tasmin - index_function: - name: count_occurrences - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: {TT} - units: "degree_Celsius" - condition: - kind: operator - operator: "<" - ET: - short_name: - long_name: - definition: - comment: "count of days when daily minimum temperature is below or equal{TT} degC" - - txgt{TT}: - reference: CLIPC - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "txgt{TT}" - standard_name: number_of_days_with_air_temperature_above_threshold - proposed_standard_name: number_of_occurrences_with_air_temperature_above_threshold - long_name: "Number of days with Tmax > {TT}C" - units: "1" - cell_methods: - - time: maximum within days - - time: sum over days - input: - data: tasmax - index_function: - name: count_occurrences - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: {TT} - units: "degree_Celsius" - condition: - kind: operator - operator: ">" - ET: - short_name: - long_name: - definition: - comment: "count of days when daily maximum temperature is above {TT} degC" - - txlt{TT}: - reference: CLIPC - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "txlt{TT}" - standard_name: number_of_days_with_air_temperature_below_threshold - proposed_standard_name: number_of_occurrences_with_air_temperature_below_threshold - long_name: "Number of days with Tmax < {TT}C" - units: "1" - cell_methods: - - time: maximum within days - - time: sum over days - input: - data: tasmax - index_function: - name: count_occurrences - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: {TT} - units: "degree_Celsius" - condition: - kind: operator - operator: "<" - ET: - short_name: - long_name: - definition: - comment: "count of days when daily maximum temperature is below {TT} degC" - - txge{TT}: - reference: CLIPC - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "txge{TT}" - standard_name: number_of_days_with_air_temperature_above_threshold - proposed_standard_name: number_of_occurrences_with_air_temperature_at_or_above_threshold - long_name: "Number of days with Tmax >= {TT}C" - units: "1" - cell_methods: - - time: maximum within days - - time: sum over days - input: - data: tasmax - index_function: - name: count_occurrences - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: {TT} - units: "degree_Celsius" - condition: - kind: operator - operator: ">" - ET: - short_name: - long_name: - definition: - comment: "count of days when daily maximum temperature is above or equal {TT} degC" - - txle{TT}: - reference: CLIPC - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "txle{TT}" - standard_name: number_of_days_with_air_temperature_below_threshold - proposed_standard_name: number_of_occurrences_with_air_temperature_at_or_below_threshold - long_name: "Number of days with Tmax <= {TT}C" - units: "1" - cell_methods: - - time: maximum within days - - time: sum over days - input: - data: tasmax - index_function: - name: count_occurrences - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: {TT} - units: "degree_Celsius" - condition: - kind: operator - operator: "<" - ET: - short_name: - long_name: - definition: - comment: "count of days when daily maximum temperature is below or equal {TT} degC" - - tmgt{TT}: - reference: CLIPC - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "tmgt{TT}" - standard_name: number_of_days_with_air_temperature_above_threshold - proposed_standard_name: number_of_occurrences_with_air_temperature_above_threshold - long_name: "Number of days with Tmean > {TT}C" - units: "1" - cell_methods: - - time: mean within days - - time: sum over days - input: - data: tas - index_function: - name: count_occurrences - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: {TT} - units: "degree_Celsius" - condition: - kind: operator - operator: ">" - ET: - short_name: - long_name: - definition: - comment: "count of days when daily mean temperature is above {TT} degC" - - tmlt{TT}: - reference: CLIPC - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "tmlt{TT}" - standard_name: number_of_days_with_air_temperature_below_threshold - proposed_standard_name: number_of_occurrences_with_air_temperature_below_threshold - long_name: "Number of days with Tmean < {TT}C" - units: "1" - cell_methods: - - time: mean within days - - time: sum over days - input: - data: tas - index_function: - name: count_occurrences - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: {TT} - units: "degree_Celsius" - condition: - kind: operator - operator: "<" - ET: - short_name: - long_name: - definition: - comment: "count of days when daily mean temperature is below {TT} degC" - - tmge{TT}: - reference: CLIPC - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "tmge{TT}" - standard_name: number_of_days_with_air_temperature_above_threshold - proposed_standard_name: number_of_occurrences_with_air_temperature_at_or_above_threshold - long_name: "Number of days with Tmean >= {TT}C" - units: "1" - cell_methods: - - time: mean within days - - time: sum over days - input: - data: tas - index_function: - name: count_occurrences - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: {TT} - units: "degree_Celsius" - condition: - kind: operator - operator: ">" - ET: - short_name: - long_name: - definition: - comment: "count of days when daily mean temperature is above or equal {TT} degC" - - tmle{TT}: - reference: CLIPC - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "tmle{TT}" - standard_name: number_of_days_with_air_temperature_below_threshold - proposed_standard_name: number_of_occurrences_with_air_temperature_at_or_below_threshold - long_name: "Number of days with Tmean <= {TT}C" - units: "1" - cell_methods: - - time: mean within days - - time: sum over days - input: - data: tas - index_function: - name: count_occurrences - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: {TT} - units: "degree_Celsius" - condition: - kind: operator - operator: "<" - ET: - short_name: - long_name: - definition: - comment: "count of days when daily mean temperature is below or equal{TT} degC" - - ctngt{TT}: - reference: CLIPC - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "ctngt{TT}" - standard_name: spell_length_of_days_with_air_temperature_above_threshold - proposed_standard_name: spell_length_with_air_temperature_above_threshold - long_name: "Maximum number of consequtive days with Tmin > {TT}C" - units: "day" - cell_methods: - - time: minimum within days - - time: maximum over days - input: - data: tasmin - index_function: - name: spell_length - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: {TT} - units: "degree_Celsius" - condition: - kind: operator - operator: ">" - reducer: - kind: reducer - reducer: max - ET: - short_name: - long_name: - definition: - comment: "count of days when daily minimum temperature is above {TT} degC" - - cfd: - reference: ECA&D - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "cfd" - standard_name: spell_length_of_days_with_air_temperature_below_threshold - proposed_standard_name: spell_length_with_air_temperature_below_threshold - long_name: "Maximum number of consecutive frost days (Tmin < 0 C)" - units: "day" - cell_methods: - - time: minimum within days - - time: maximum over days - input: - data: tasmin - index_function: - name: count_occurrences - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: 0 - units: "degree_Celsius" - condition: - kind: operator - operator: "<" - ET: - short_name: "cfd" - long_name: - definition: - comment: - - csu: - reference: ECA&D - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "csu" - standard_name: spell_length_of_days_with_air_temperature_above_threshold - proposed_standard_name: spell_length_with_air_temperature_above_threshold - long_name: "Maximum number of consecutive summer days (Tmax >25 C)" - units: "day" - cell_methods: - - time: maximum within days - - time: maximum over days - input: - data: tasmax - index_function: - name: count_occurrences - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: 25 - units: "degree_Celsius" - condition: - kind: operator - operator: ">" - ET: - short_name: "cfd" - long_name: - definition: - comment: - - ctnlt{TT}: - reference: CLIPC - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "ctnlt{TT}" - standard_name: spell_length_of_days_with_air_temperature_below_threshold - proposed_standard_name: spell_length_with_air_temperature_below_threshold - long_name: "Maximum number of consequtive days with Tmin < {TT}C" - units: "day" - cell_methods: - - time: minimum within days - - time: maximum over days - input: - data: tasmin - index_function: - name: spell_length - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: {TT} - units: "degree_Celsius" - condition: - kind: operator - operator: "<" - reducer: - kind: reducer - reducer: max - ET: - short_name: - long_name: - definition: - comment: "count of days when daily minimum temperature is below {TT} degC" - - ctnge{TT}: - reference: CLIPC - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "ctnge{TT}" - standard_name: spell_length_of_days_with_air_temperature_above_threshold - proposed_standard_name: spell_length_with_air_temperature_at_or_above_threshold - long_name: "Maximum number of consequtive days with Tmin >= {TT}C" - units: "day" - cell_methods: - - time: minimum within days - - time: maximum over days - input: - data: tasmin - index_function: - name: spell_length - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: {TT} - units: "degree_Celsius" - condition: - kind: operator - operator: ">" - reducer: - kind: reducer - reducer: max - ET: - short_name: - long_name: - definition: - comment: "count of days when daily minimum temperature is above or equal {TT} degC" - - ctnle{TT}: - reference: CLIPC - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "ctnle{TT}" - standard_name: spell_length_of_days_with_air_temperature_below_threshold - proposed_standard_name: spell_length_with_air_temperature_at_or_below_threshold - long_name: "Maximum number of consequtive days with Tmin <= {TT}C" - units: "day" - cell_methods: - - time: minimum within days - - time: maximum over days - input: - data: tasmin - index_function: - name: spell_length - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: {TT} - units: "degree_Celsius" - condition: - kind: operator - operator: "<" - reducer: - kind: reducer - reducer: max - ET: - short_name: - long_name: - definition: - comment: "count of days when daily minimum temperature is below or equal{TT} degC" - - ctxgt{TT}: - reference: CLIPC - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "ctxgt{TT}" - standard_name: spell_length_of_days_with_air_temperature_above_threshold - proposed_standard_name: spell_length_with_air_temperature_above_threshold - long_name: "Maximum number of consequtive days with Tmax > {TT}C" - units: "day" - cell_methods: - - time: maximum within days - - time: maximum over days - input: - data: tasmax - index_function: - name: spell_length - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: {TT} - units: "degree_Celsius" - condition: - kind: operator - operator: ">" - reducer: - kind: reducer - reducer: max - ET: - short_name: - long_name: - definition: - comment: "count of days when daily maximum temperature is above {TT} degC" - - ctxlt{TT}: - reference: CLIPC - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "ctxlt{TT}" - standard_name: spell_length_of_days_with_air_temperature_below_threshold - proposed_standard_name: spell_length_with_air_temperature_below_threshold - long_name: "Maximum number of consequtive days with Tmax < {TT}C" - units: "day" - cell_methods: - - time: maximum within days - - time: maximum over days - input: - data: tasmax - index_function: - name: spell_length - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: {TT} - units: "degree_Celsius" - condition: - kind: operator - operator: "<" - reducer: - kind: reducer - reducer: max - ET: - short_name: - long_name: - definition: - comment: "count of days when daily maximum temperature is below {TT} degC" - - ctxge{TT}: - reference: CLIPC - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "ctxge{TT}" - standard_name: spell_length_of_days_with_air_temperature_above_threshold - proposed_standard_name: spell_length_with_air_temperature_at_or_above_threshold - long_name: "Maximum number of consequtive days with Tmax >= {TT}C" - units: "day" - cell_methods: - - time: maximum within days - - time: maximum over days - input: - data: tasmax - index_function: - name: spell_length - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: {TT} - units: "degree_Celsius" - condition: - kind: operator - operator: ">" - reducer: - kind: reducer - reducer: max - ET: - short_name: - long_name: - definition: - comment: "count of days when daily maximum temperature is above or equal {TT} degC" - - ctxle{TT}: - reference: CLIPC - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "ctxle{TT}" - standard_name: spell_length_of_days_with_air_temperature_below_threshold - proposed_standard_name: spell_length_with_air_temperature_at_or_below_threshold - long_name: "Maximum number of consequtive days with Tmax <= {TT}C" - units: "day" - cell_methods: - - time: maximum within days - - time: maximum over days - input: - data: tasmax - index_function: - name: spell_length - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: {TT} - units: "degree_Celsius" - condition: - kind: operator - operator: "<" - reducer: - kind: reducer - reducer: max - ET: - short_name: - long_name: - definition: - comment: "count of days when daily maximum temperature is below or equal {TT} degC" - - ctmgt{TT}: - reference: CLIPC - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "ctmgt{TT}" - standard_name: spell_length_of_days_with_air_temperature_above_threshold - proposed_standard_name: spell_length_with_air_temperature_above_threshold - long_name: "Maximum number of consequtive days with Tmean > {TT}C" - units: "day" - cell_methods: - - time: mean within days - - time: maximum over days - input: - data: tas - index_function: - name: spell_length - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: {TT} - units: "degree_Celsius" - condition: - kind: operator - operator: ">" - reducer: - kind: reducer - reducer: max - ET: - short_name: - long_name: - definition: - comment: "count of days when daily mean temperature is above {TT} degC" - - ctmlt{TT}: - reference: CLIPC - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "ctmlt{TT}" - standard_name: spell_length_of_days_with_air_temperature_below_threshold - proposed_standard_name: spell_length_with_air_temperature_below_threshold - long_name: "Maximum number of consequtive days with Tmean < {TT}C" - units: "day" - cell_methods: - - time: mean within days - - time: maximum over days - input: - data: tas - index_function: - name: spell_length - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: {TT} - units: "degree_Celsius" - condition: - kind: operator - operator: "<" - reducer: - kind: reducer - reducer: max - ET: - short_name: - long_name: - definition: - comment: "count of days when daily mean temperature is below {TT} degC" - - ctmge{TT}: - reference: CLIPC - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "ctmge{TT}" - standard_name: spell_length_of_days_with_air_temperature_above_threshold - proposed_standard_name: spell_length_with_air_temperature_at_or_above_threshold - long_name: "Maximum number of consequtive days with Tmean >= {TT}C" - units: "day" - cell_methods: - - time: mean within days - - time: maximum over days - input: - data: tas - index_function: - name: spell_length - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: {TT} - units: "degree_Celsius" - condition: - kind: operator - operator: ">" - reducer: - kind: reducer - reducer: max - ET: - short_name: - long_name: - definition: - comment: "count of days when daily mean temperature is above or equal {TT} degC" - - ctmle{TT}: - reference: CLIPC - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "ctmle{TT}" - standard_name: spell_length_of_days_with_air_temperature_below_threshold - proposed_standard_name: spell_length_with_air_temperature_at_or_below_threshold - long_name: "Maximum number of consequtive days with Tmean <= {TT}C" - units: "day" - cell_methods: - - time: mean within days - - time: maximum over days - input: - data: tas - index_function: - name: spell_length - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: {TT} - units: "degree_Celsius" - condition: - kind: operator - operator: "<" - reducer: - kind: reducer - reducer: max - ET: - short_name: - long_name: - definition: - comment: "count of days when daily mean temperature is below or equal{TT} degC" - - txx: - reference: ECAD - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "txx" - standard_name: air_temperature - long_name: "Maximum daily maximum temperature" - units: "degree_Celsius" - cell_methods: - - time: maximum within days - - time: maximum over days - input: - data: tasmax - index_function: - name: statistics - parameters: - reducer: - kind: reducer - reducer: max - ET: - short_name: "txx" - long_name: "Maximum daily maximum temperature" - definition: "Maximum value of daily TX" - comment: "maximum of daily maximum temperature" - - tnx: - reference: ECAD - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "tnx" - standard_name: air_temperature - long_name: "Maximum daily minimum temperature" - units: "degree_Celsius" - cell_methods: - - time: minimum within days - - time: maximum over days - input: - data: tasmin - index_function: - name: statistics - parameters: - reducer: - kind: reducer - reducer: max - ET: - short_name: "tnx" - long_name: "Maximum daily minimum temperature" - definition: "Maximum value of daily TN" - comment: "maximum of daily minimum temperature" - - txn: - reference: ECAD - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "txn" - standard_name: air_temperature - long_name: "Minimum daily maximum temperature" - units: "degree_Celsius" - cell_methods: - - time: maximum within days - - time: minimum over days - input: - data: tasmax - index_function: - name: statistics - parameters: - reducer: - kind: reducer - reducer: min - ET: - short_name: "txn" - long_name: "Minimum daily maximum temperature" - definition: "Minimum value of daily TX" - comment: "minimum of daily maximum temperature" - - tnn: - reference: ECAD - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "tnn" - standard_name: air_temperature - long_name: "Minimum daily minimum temperature" - units: "degree_Celsius" - cell_methods: - - time: minimum within days - - time: minimum over days - input: - data: tasmin - index_function: - name: statistics - parameters: - reducer: - kind: reducer - reducer: min - ET: - short_name: "tnn" - long_name: "Minimum daily minimum temperature" - definition: "Minimum value of daily TN" - comment: "minimum of daily minimum temperature" - - txm: - reference: - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "txm" - standard_name: air_temperature - long_name: "Mean daily maximum temperature" - units: "degree_Celsius" - cell_methods: - - time: maximum within days - - time: mean over days - input: - data: tasmax - index_function: - name: statistics - parameters: - reducer: - kind: reducer - reducer: mean - ET: - short_name: "txm" - long_name: "Mean daily maximum temperature" - definition: "Mean value of daily TX" - comment: - - tnm: - reference: - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "tnm" - standard_name: air_temperature - long_name: "Mean daily minimum temperature" - units: "degree_Celsius" - cell_methods: - - time: minimum within days - - time: mean over days - input: - data: tasmin - index_function: - name: statistics - parameters: - reducer: - kind: reducer - reducer: mean - ET: - short_name: "tnx" - long_name: "Mean daily minimum temperature" - definition: "Mean value of daily TN" - comment: - - tmx: - reference: - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "tmx" - standard_name: air_temperature - long_name: "Maximum daily mean temperature" - units: "degree_Celsius" - cell_methods: - - time: mean within days - - time: maximum over days - input: - data: tas - index_function: - name: statistics - parameters: - reducer: - kind: reducer - reducer: max - ET: - short_name: "tmx" - long_name: "Maximum daily mean temperature" - definition: "Maximum value of daily TM" - comment: - - tmn: - reference: - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "tmn" - standard_name: air_temperature - long_name: "Minimum daily mean temperature" - units: "degree_Celsius" - cell_methods: - - time: mean within days - - time: minimum over days - input: - data: tas - index_function: - name: statistics - parameters: - reducer: - kind: reducer - reducer: min - ET: - short_name: "tmn" - long_name: "Minimum daily mean temperature" - definition: "Minimum value of daily TM" - comment: - - tmm: - reference: - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "tmm" - standard_name: air_temperature - long_name: "Mean daily mean temperature" - units: "degree_Celsius" - cell_methods: - - time: mean within days - - time: mean over days - input: - data: tas - index_function: - name: statistics - parameters: - reducer: - kind: reducer - reducer: mean - ET: - short_name: "tmm" - long_name: "Mean daily mean temperature" - definition: "Mean value of daily TM" - comment: - - txmax: - reference: CLIPC - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "txmax" - standard_name: air_temperature - long_name: "Maximum daily maximum temperature" - units: "degree_Celsius" - cell_methods: - - time: maximum within days - - time: maximum over days - input: - data: tasmax - index_function: - name: statistics - parameters: - reducer: - kind: reducer - reducer: max - ET: - short_name: - long_name: - definition: - comment: "mean of daily maximum temperature" - - tnmax: - reference: CLIPC - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "tnmax" - standard_name: air_temperature - long_name: "Maximum daily minimum temperature" - units: "degree_Celsius" - cell_methods: - - time: minimum within days - - time: maximum over days - input: - data: tasmin - index_function: - name: statistics - parameters: - reducer: - kind: reducer - reducer: max - ET: - short_name: - long_name: - definition: - comment: "mean of daily minimum temperature" - - txmin: - reference: CLIPC - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "txmin" - standard_name: air_temperature - long_name: "Minimum daily maximum temperature" - units: "degree_Celsius" - cell_methods: - - time: maximum within days - - time: minimum over days - input: - data: tasmax - index_function: - name: statistics - parameters: - reducer: - kind: reducer - reducer: min - ET: - short_name: - long_name: - definition: - comment: "maximum of daily mean temperature" - - tnmin: - reference: CLIPC - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "tnmin" - standard_name: air_temperature - long_name: "Minimum daily minimum temperature" - units: "degree_Celsius" - cell_methods: - - time: minimum within days - - time: minimum over days - input: - data: tasmin - index_function: - name: statistics - parameters: - reducer: - kind: reducer - reducer: min - ET: - short_name: - long_name: - definition: - comment: "minimum of daily mean temperature" - - txmean: - reference: CLIPC - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "txmean" - standard_name: air_temperature - long_name: "Mean daily maximum temperature" - units: "degree_Celsius" - cell_methods: - - time: maximum within days - - time: mean over days - input: - data: tasmax - index_function: - name: statistics - parameters: - reducer: - kind: reducer - reducer: mean - ET: - short_name: - long_name: - definition: - comment: "mean of daily maximum temperature" - - tnmean: - reference: CLIPC - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "tnmean" - standard_name: air_temperature - long_name: "Mean daily minimum temperature" - units: "degree_Celsius" - cell_methods: - - time: minimum within days - - time: mean over days - input: - data: tasmin - index_function: - name: statistics - parameters: - reducer: - kind: reducer - reducer: mean - ET: - short_name: - long_name: - definition: - comment: "mean of daily minimum temperature" - - tmmax: - reference: CLIPC - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "tmmax" - standard_name: air_temperature - long_name: "Maximum daily mean temperature" - units: "degree_Celsius" - cell_methods: - - time: mean within days - - time: maximum over days - input: - data: tas - index_function: - name: statistics - parameters: - reducer: - kind: reducer - reducer: max - ET: - short_name: - long_name: - definition: - comment: "maximum of daily mean temperature" - - tmmin: - reference: CLIPC - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "tmmin" - standard_name: air_temperature - long_name: "Minimum daily mean temperature" - units: "degree_Celsius" - cell_methods: - - time: mean within days - - time: maximum over days - input: - data: tas - index_function: - name: statistics - parameters: - reducer: - kind: reducer - reducer: min - ET: - short_name: - long_name: - definition: - comment: "minimum of daily mean temperature" - - tmmean: - reference: CLIPC - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "tmmean" - standard_name: air_temperature - long_name: "Mean daily mean temperature" - units: "degree_Celsius" - cell_methods: - - time: mean within days - - time: mean over days - input: - data: tas - index_function: - name: statistics - parameters: - reducer: - kind: reducer - reducer: mean - ET: - short_name: - long_name: - definition: - comment: "mean of daily mean temperature" - - tn10p: - # modified in icclim - reference: ECAD - period: - allowed: - annual: - default: annual - output: - var_name: "tn10p" - standard_name: - long_name: "Days when Tmin < 10th percentile" - units: "1" - cell_methods: - input: - data: tasmin - index_function: - name: count_percentile_occurrences - parameters: - percentile: - kind: quantity - standard_name: - proposed_standard_name: quantile - data: 10 - units: "%" - condition: - kind: operator - operator: "<" - ET: - short_name: "tn10p" - long_name: "WMO No.1500: Cold nights (count of days)" - definition: "Number of days when TN < 10th percentile" - comment: - - tx10p: - # modified in icclim - reference: ECAD - period: - allowed: - annual: - default: annual - output: - var_name: "tx10p" - standard_name: - long_name: "Days when Tmax < 10th percentile" - units: "1" - cell_methods: - input: - data: tasmax - index_function: - name: count_percentile_occurrences - parameters: - percentile: - kind: quantity - standard_name: - proposed_standard_name: quantile - data: 10 - units: "%" - condition: - kind: operator - operator: "<" - ET: - short_name: "tx10p" - long_name: "WMO No.1500: Cold day-times (count of days)" - definition: "Number of days when TX < 10th percentile" - comment: - - tn90p: - # modified in icclim - reference: ECAD - period: - allowed: - annual: - default: annual - output: - var_name: "tn90p" - standard_name: - long_name: "Days when Tmin > 90th percentile" - units: "1" - cell_methods: - input: - data: tasmin - index_function: - name: count_percentile_occurrences - parameters: - percentile: - kind: quantity - standard_name: - proposed_standard_name: quantile - data: 90 - units: "%" - condition: - kind: operator - operator: ">" - ET: - short_name: "tn90p" - long_name: "WMO No.1500: Warm nights (count of days)" - definition: "Number of days when TN > 90th percentile" - comment: - - tx90p: - # modified in icclim - reference: ECAD - period: - allowed: - annual: - default: annual - output: - var_name: "tx90p" - standard_name: - long_name: "Days when Tmax > 90th daily percentile" - units: "1" - cell_methods: - input: - data: tasmax - index_function: - name: count_percentile_occurrences - parameters: - percentile: - kind: quantity - standard_name: - proposed_standard_name: quantile - data: 90 - units: "1" - condition: - kind: operator - operator: ">" - ET: - short_name: "tx90p" - long_name: "WMO No.1500: Warm day-times (count of days)" - definition: "Number of days when TX > 90th percentile" - comment: - - tg10p: - # modified in icclim - reference: ECA&D - period: - allowed: - annual: - default: annual - output: - var_name: "tg10p" - standard_name: - long_name: "Days when Tmean < 10th percentile" - units: "1" - cell_methods: - input: - data: tas - index_function: - name: count_percentile_occurrences - parameters: - percentile: - kind: quantity - standard_name: - proposed_standard_name: quantile - data: 10 - units: "%" - condition: - kind: operator - operator: "<" - ET: - short_name: "tg10p" - long_name: - definition: - comment: - - tg90p: - # modified in icclim - reference: ECA&D - period: - allowed: - annual: - default: annual - output: - var_name: "tg90p" - standard_name: - long_name: "Days when Tmean > 90th percentile" - units: "1" - cell_methods: - input: - data: tas - index_function: - name: count_percentile_occurrences - parameters: - percentile: - kind: quantity - standard_name: - proposed_standard_name: quantile - data: 90 - units: "%" - condition: - kind: operator - operator: ">" - ET: - short_name: "tg90p" - long_name: - definition: - comment: - - txgt50p: - reference: ET-SCI - period: - allowed: - annual: - default: annual - output: - var_name: "txgt50p" - standard_name: - long_name: "Percentage of days when Tmax > 50th percentile" - units: "%" - cell_methods: - input: - data: tasmax - index_function: - name: count_percentile_occurrences - parameters: - percentile: - kind: quantity - standard_name: - proposed_standard_name: quantile - data: 50 - units: "%" - condition: - kind: operator - operator: ">" - ET: - short_name: "tx50p" - long_name: "Above average days" - definition: "Number of days where TX > 50th percentile" - comment: - - txgt{PRC}p: - reference: CLIPC - period: - allowed: - annual: - default: annual - output: - var_name: "txgt{PRC}p" - standard_name: - long_name: "Percentage of days when Tmax > {PRC}th percentile" - units: "%" - cell_methods: - input: - data: tasmax - index_function: - name: count_percentile_occurrences - parameters: - percentile: - kind: quantity - standard_name: - proposed_standard_name: quantile - data: {PRC} - units: "%" - condition: - kind: operator - operator: ">" - ET: - short_name: - long_name: - definition: - comment: - - tngt{PRC}p: - reference: CLIPC - period: - allowed: - annual: - default: annual - output: - var_name: "tngt{PRC}p" - standard_name: - long_name: "Percentage of days when Tmin > {PRC}th percentile" - units: "%" - cell_methods: - input: - data: tasmin - index_function: - name: count_percentile_occurrences - parameters: - percentile: - kind: quantity - standard_name: - proposed_standard_name: quantile - data: {PRC} - units: "%" - condition: - kind: operator - operator: ">" - ET: - short_name: - long_name: - definition: - comment: - - tmgt{PRC}p: - reference: CLIPC - period: - allowed: - annual: - default: annual - output: - var_name: "tmgt{PRC}p" - standard_name: - long_name: "Percentage of days when Tmean > {PRC}th percentile" - units: "%" - cell_methods: - input: - data: tas - index_function: - name: count_percentile_occurrences - parameters: - percentile: - kind: quantity - standard_name: - proposed_standard_name: quantile - data: {PRC} - units: "%" - condition: - kind: operator - operator: ">" - ET: - short_name: - long_name: - definition: - comment: - - txlt{PRC}p: - reference: CLIPC - period: - allowed: - annual: - default: annual - output: - var_name: "txlt{PRC}p" - standard_name: - long_name: "Percentage of days when Tmax < {PRC}th percentile" - units: "%" - cell_methods: - input: - data: tasmax - index_function: - name: count_percentile_occurrences - parameters: - percentile: - kind: quantity - standard_name: - proposed_standard_name: quantile - data: {PRC} - units: "%" - condition: - kind: operator - operator: "<" - ET: - short_name: - long_name: - definition: - comment: - - tnlt{PRC}p: - reference: CLIPC - period: - allowed: - annual: - default: annual - output: - var_name: "tnlt{PRC}p" - standard_name: - long_name: "Percentage of days when Tmin < {PRC}th percentile" - units: "%" - cell_methods: - input: - data: tasmin - index_function: - name: count_percentile_occurrences - parameters: - percentile: - kind: quantity - standard_name: - proposed_standard_name: quantile - data: {PRC} - units: "%" - condition: - kind: operator - operator: "<" - ET: - short_name: - long_name: - definition: - comment: - - tmlt{PRC}p: - reference: CLIPC - period: - allowed: - annual: - default: annual - output: - var_name: "tmlt{PRC}p" - standard_name: - long_name: "Percentage of days when Tmean < {PRC}th percentile" - units: "%" - cell_methods: - input: - data: tas - index_function: - name: count_percentile_occurrences - parameters: - percentile: - kind: quantity - standard_name: - proposed_standard_name: quantile - data: {PRC} - units: "%" - condition: - kind: operator - operator: "<" - ET: - short_name: - long_name: - definition: - comment: - - dtr: - reference: ECAD - period: - allowed: - annual: - seasonal: - monthly: - default: monthly - output: - var_name: "dtr" - standard_name: - proposed_standard_name: air_temperature_range - long_name: "Mean Diurnal Temperature Range" - units: "degree_Celsius" - cell_methods: - - time: range within days - - time: mean over days - input: - low_data: tasmin - high_data: tasmax - index_function: - name: diurnal_temperature_range - parameters: - ET: - short_name: "dtr" - long_name: "Daily temperature range" - definition: "Monthly mean difference between TX and TN" - comment: "mean of daily temperature range" - - vdtr: - reference: ECA&D - period: - allowed: - annual: - seasonal: - monthly: - default: monthly - output: - var_name: "vdtr" - standard_name: - proposed_standard_name: air_temperature_difference - long_name: "Mean day-to-day variation in Diurnal Temperature Range" - units: "degree_Celsius" - cell_methods: - input: - low_data: tasmin - high_data: tasmax - index_function: - name: interday_diurnal_temperature_range - parameters: - ET: - short_name: - long_name: - definition: - comment: - - etr: - reference: ECA&D - period: - allowed: - annual: - seasonal: - monthly: - default: monthly - output: - var_name: "etr" - standard_name: - proposed_standard_name: air_temperature_range - long_name: "Intra-period extreme temperature range" - units: "degree_Celsius" - cell_methods: - - time: range - input: - low_data: tasmin - high_data: tasmax - index_function: - name: extreme_temperature_range - parameters: - ET: - short_name: - long_name: - definition: - comment: - - tx{PRC}pctl: - reference: CLIPC - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "tx{PRC}pctl" - standard_name: air_temperature - long_name: "{PRC}th percentile of Tmax" - units: "degree_Celsius" - cell_methods: - input: - data: tasmax - index_function: - name: percentile - parameters: - percentiles: - kind: quantity - standard_name: - proposed_standard_name: quantile - data: {PRC} - units: "%" - ET: - short_name: - long_name: - definition: - comment: - - tn{PRC}pctl: - reference: CLIPC - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "tn{PRC}pctl" - standard_name: air_temperature - long_name: "{PRC}th percentile of Tmin" - units: "degree_Celsius" - cell_methods: - input: - data: tasmin - index_function: - name: percentile - parameters: - percentiles: - kind: quantity - standard_name: - proposed_standard_name: quantile - data: {PRC} - units: "%" - ET: - short_name: - long_name: - definition: - comment: - - tm{PRC}pctl: - reference: CLIPC - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "tm{PRC}pctl" - standard_name: air_temperature - long_name: "{PRC}th percentile of Tmean" - units: "degree_Celsius" - cell_methods: - input: - data: tas - index_function: - name: percentile - parameters: - percentiles: - kind: quantity - standard_name: - proposed_standard_name: quantile - data: {PRC} - units: "%" - ET: - short_name: - long_name: - definition: - comment: - - hd17: - reference: ECA&D - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "hd17" - standard_name: integral_wrt_time_of_air_temperature_excess - long_name: "Heating degree days (sum of Tmean < 17 C)" - units: "degree_Celsius day" - cell_methods: - - time: mean within days - - time: sum over days - input: - data: tas - index_function: - name: temperature_sum - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: 17 - units: "degree_Celsius" - condition: - kind: operator - operator: "<" - ET: - short_name: - long_name: - definition: - comment: - - hddheat{TT}: - reference: ET-SCI - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "hddheat{TT}" - standard_name: integral_wrt_time_of_air_temperature_deficit - long_name: "Heating Degree Days (Tmean < {TT}C)" - units: "degree_Celsius day" - cell_methods: - - time: mean within days - - time: sum over days - input: - data: tas - index_function: - name: temperature_sum - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: {TT} - units: "degree_Celsius" - condition: - kind: operator - operator: "<" - ET: - short_name: "hddheat" - long_name: "Heating degree days" - definition: "Sum of Tb- TM (where Tb is a user- defined location-specific base temperature and TM < Tb)" - comment: - - ddgt{TT}: - reference: CLIPC - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "ddgt{TT}" - standard_name: integral_wrt_time_of_air_temperature_excess - long_name: "Degree Days (Tmean > {TT}C)" - units: "degree_Celsius day" - cell_methods: - - time: mean within days - - time: sum over days - input: - data: tas - index_function: - name: temperature_sum - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: {TT} - units: "degree_Celsius" - condition: - kind: operator - operator: ">" - ET: - short_name: - long_name: - definition: - comment: - - cddcold{TT}: - reference: ET-SCI - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "cddcold{TT}" - standard_name: integral_wrt_time_of_air_temperature_excess - long_name: "Cooling Degree Days (Tmean > {TT}C)" - units: "degree_Celsius day" - cell_methods: - - time: mean within days - - time: sum over days - input: - data: tas - index_function: - name: temperature_sum - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: {TT} - units: "degree_Celsius" - condition: - kind: operator - operator: ">" - ET: - short_name: "cddcold" - long_name: "Cooling degree days" - definition: "Sum of TM - Tb (where Tb is a user- defined location-specific base temperature and TM > Tb)" - comment: - - ddlt{TT}: - reference: CLIPC - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "ddlt{TT}" - standard_name: integral_wrt_time_of_air_temperature_deficit - long_name: "Degree Days (Tmean < {TT}C)" - units: "degree_Celsius day" - cell_methods: - - time: mean within days - - time: sum over days - input: - data: tas - index_function: - name: temperature_sum - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: {TT} - units: "degree_Celsius" - condition: - kind: operator - operator: "<" - ET: - short_name: - long_name: - definition: - comment: - - gddgrow{TT}: - reference: ET-SCI - period: - allowed: - annual: - default: annual - output: - var_name: "gddgrow{TT}" - standard_name: integral_wrt_time_of_air_temperature_excess - long_name: "Annual Growing Degree Days (Tmean > {TT}C)" - units: "degree_Celsius day" - cell_methods: - - time: mean within days - - time: sum over days - input: - data: tas - index_function: - name: temperature_sum - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: {TT} - units: "degree_Celsius" - condition: - kind: operator - operator: ">" - ET: - short_name: "gddgrow" - long_name: "Growing degree days" - definition: "Annual sum of TM - Tb (where Tb is a user- defined location-specific base temperature and TM >Tb)" - comment: - - gd4: - reference: ECA&D - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "gd4" - standard_name: integral_wrt_time_of_air_temperature_excess - long_name: "Growing degree days (sum of Tmean > 4 C)" - units: "degree_Celsius day" - cell_methods: - - time: mean within days - - time: sum over days - input: - data: tas - index_function: - name: temperature_sum - parameters: - threshold: - kind: quantity - standard_name: air_temperature - data: 4 - units: "degree_Celsius" - condition: - kind: operator - operator: ">" - ET: - short_name: - long_name: - definition: - comment: - - r10mm: - reference: ECAD - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "r10mm" - standard_name: number_of_days_with_lwe_thickness_of_precipitation_amount_above_threshold - proposed_standard_name: number_of_occurrences_with_lwe_thickness_of_precipitation_amount_at_or_above_threshold - long_name: "Number of heavy precipitation days (Precip >=10mm)" - units: "1" - cell_methods: - - time: sum within days - - time: sum over days - input: - data: pr - index_function: - name: count_occurrences - parameters: - threshold: - kind: quantity - standard_name: lwe_precipitation_rate - data: 10 - units: "mm day-1" - condition: - kind: operator - operator: ">" - ET: - short_name: "r10mm" - long_name: "Number of heavy precipitation days" - definition: "Count of days when P>=10mm" - comment: "count of days when daily total precipitation is above 10 mm" - - r20mm: - reference: ECAD - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "r20mm" - standard_name: number_of_days_with_lwe_thickness_of_precipitation_amount_above_threshold - proposed_standard_name: number_of_occurrences_with_lwe_thickness_of_precipitation_amount_at_or_above_threshold - long_name: "Number of very heavy precipitation days (Precip >= 20mm)" - units: "1" - cell_methods: - - time: sum within days - - time: sum over days - input: - data: pr - index_function: - name: count_occurrences - parameters: - threshold: - kind: quantity - standard_name: lwe_precipitation_rate - data: 20 - units: "mm day-1" - condition: - kind: operator - operator: ">" - ET: - short_name: "r20mm" - long_name: "Number of very heavy precipitation days" - definition: "Count of days when P>=20mm" - comment: "count of days when daily total precipitation is above 20 mm" - - r{RT}mm: - reference: ECAD - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "r{RT}mm" - standard_name: number_of_days_with_lwe_thickness_of_precipitation_amount_above_threshold - proposed_standard_name: number_of_occurrences_with_lwe_thickness_of_precipitation_amount_at_or_above_threshold - long_name: "Number of days with daily Precip >= {RT}mm)" - units: "1" - cell_methods: - - time: sum within days - - time: sum over days - input: - data: pr - index_function: - name: count_occurrences - parameters: - threshold: - kind: quantity - standard_name: lwe_precipitation_rate - data: {RT} - units: "mm day-1" - condition: - kind: operator - operator: ">" - ET: - short_name: "rnnmm" - long_name: "Number of days above a user-defined threshold" - definition: - comment: "count of days when daily total precipitation is above X mm" - - wetdays: - reference: CLIPC - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "wetdays" - standard_name: number_of_days_with_lwe_thickness_of_precipitation_amount_above_threshold - proposed_standard_name: number_of_occurrences_with_lwe_thickness_of_precipitation_amount_at_or_above_threshold - long_name: "Number of Wet Days (precip >= 1 mm)" - units: "1" - cell_methods: - - time: sum within days - - time: sum over days - input: - data: pr - index_function: - name: count_occurrences - parameters: - threshold: - kind: quantity - standard_name: lwe_precipitation_rate - long_name: "Wet day threshold" - data: 1 - units: "mm day-1" - condition: - kind: operator - operator: ">" - ET: - short_name: - long_name: - definition: - comment: - - rr1: - reference: ECA&D - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "rr1" - standard_name: number_of_days_with_lwe_thickness_of_precipitation_amount_above_threshold - proposed_standard_name: number_of_occurrences_with_lwe_thickness_of_precipitation_amount_at_or_above_threshold - long_name: "Number of Wet Days (precip >= 1 mm)" - units: "1" - cell_methods: - - time: sum within days - - time: sum over days - input: - data: pr - index_function: - name: count_occurrences - parameters: - threshold: - kind: quantity - standard_name: lwe_precipitation_rate - long_name: "Wet day threshold" - data: 1 - units: "mm day-1" - condition: - kind: operator - operator: ">" - ET: - short_name: - long_name: - definition: - comment: - - cdd: - reference: ECAD - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "cdd" - standard_name: spell_length_of_days_with_lwe_thickness_of_precipitation_amount_below_threshold - proposed_standard_name: spell_length_with_lwe_thickness_of_precipitation_amount_below_threshold - long_name: "Maximum consecutive dry days (Precip < 1mm)" - units: "day" - cell_methods: - - time: sum within days - - time: sum over days - input: - data: pr - index_function: - name: spell_length - parameters: - threshold: - kind: quantity - standard_name: lwe_precipitation_rate - long_name: "Wet day threshold" - data: 1 - units: "mm day-1" - condition: - kind: operator - operator: "<" - reducer: - kind: reducer - reducer: max - ET: - short_name: "cdd" - long_name: "Consecutive dry days" - definition: "Maximum number of consecutive days with P<1mm" - comment: "maximum consecutive days when daily total precipitation is below 1 mm" - - cwd: - reference: ECAD - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "cwd" - standard_name: spell_length_of_days_with_lwe_thickness_of_precipitation_amount_above_threshold - proposed_standard_name: spell_length_with_lwe_thickness_of_precipitation_amount_at_or_above_threshold - long_name: "Maximum consecutive wet days (Precip >= 1mm)" - units: "day" - cell_methods: - - time: sum within days - - time: sum over days - input: - data: pr - index_function: - name: spell_length - parameters: - threshold: - kind: quantity - standard_name: lwe_precipitation_rate - long_name: "Wet day threshold" - data: 1 - units: "mm day-1" - condition: - kind: operator - operator: ">" - reducer: - kind: reducer - reducer: max - ET: - short_name: "cwd" - long_name: "Consecutive wet days" - definition: "Maximum number of consecutive days with P>=1mm" - comment: "maximum consecutive days when daily total precipitation is at least 1 mm" - - prcptot: - reference: ECAD - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "prcptot" - standard_name: lwe_thickness_of_precipitation_amount - long_name: "Total precipitation during Wet Days" - units: "mm" - cell_methods: - - time: sum within days - - time: mean over days - input: - data: pr - index_function: - name: thresholded_statistics - parameters: - threshold: - kind: quantity - standard_name: lwe_precipitation_rate - long_name: "Wet day threshold" - data: 1 - units: "mm day-1" - condition: - kind: operator - operator: ">" - reducer: - kind: reducer - reducer: sum - ET: - short_name: "prcptot" - long_name: "Total wet- day precipitation" - definition: "PRCP from wet days (P>=1mm)" - comment: "sum of total daily precipitation during days having at least 1 mm" - - sdii: - reference: ECAD - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "sdii" - standard_name: lwe_precipitation_rate - long_name: "Average precipitation during Wet Days (SDII)" - units: "mm day-1" - cell_methods: - - time: sum within days - - time: mean over days - input: - data: pr - index_function: - name: thresholded_statistics - parameters: - threshold: - kind: quantity - standard_name: lwe_precipitation_rate - long_name: "Wet day threshold" - data: 1 - units: "mm day-1" - condition: - kind: operator - operator: ">" - reducer: - kind: reducer - reducer: mean - ET: - short_name: "sdii" - long_name: "Simple precipitation intensity index" - definition: "PRCPTOT / Nwetdays" - comment: "mean daily total precipitation during days having at least 1 mm" - - r{PRC}pctl: - reference: CLIPC - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "r{PRC}pctl" - standard_name: lwe_thickness_of_precipitation_amount - long_name: "{PRC}th percentile of precipitation during wet days (Precip >= 1mm)" - units: "mm" - cell_methods: - input: - data: pr - index_function: - name: thresholded_percentile - parameters: - threshold: - kind: quantity - standard_name: lwe_precipitation_rate - long_name: "Wet day threshold" - data: 1 - units: "mm day-1" - condition: - kind: operator - operator: ">" - percentiles: - kind: quantity - standard_name: - proposed_standard_name: quantile - data: {PRC} - units: "%" - ET: - short_name: - long_name: - definition: - comment: - - rx1day: - reference: ECAD - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "rx1day" - standard_name: lwe_thickness_of_precipitation_amount - long_name: "Maximum 1-day precipitation" - units: "mm" - cell_methods: - - time: sum within days - - time: maximum over days - input: - data: pr - index_function: - name: thresholded_statistics - parameters: - threshold: - kind: quantity - standard_name: lwe_precipitation_rate - long_name: "Wet day threshold" - data: 1 - units: "mm day-1" - condition: - kind: operator - operator: ">" - reducer: - kind: reducer - reducer: max - ET: - short_name: "rx1day" - long_name: "Monthly maximum 1-day precipitation" - definition: "Maximum one-day precipitation" - comment: - - rx5day: - reference: ECAD - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "rx5day" - standard_name: lwe_thickness_of_precipitation_amount - long_name: "Maximum 5-day precipitation" - units: "mm" - cell_methods: - input: - data: pr - index_function: - name: thresholded_running_statistics - parameters: - threshold: - kind: quantity - standard_name: lwe_precipitation_rate - long_name: "Wet day threshold" - data: 1 - units: "mm day-1" - condition: - kind: operator - operator: ">" - rolling_aggregator: - kind: reducer - reducer: sum - window_size: - kind: quantity - standard_name: - proposed_standard_name: temporal_window_size - data: 5 - units: "day" - reducer: - kind: reducer - reducer: max - ET: - short_name: "rx5day" - long_name: "Monthly maximum 5-day precipitation" - definition: "Maximum consecutive five-day precipitation" - comment: - - rx{ND}day: - reference: ET-SCI - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "rx{ND}day" - standard_name: lwe_thickness_of_precipitation_amount - long_name: "Maximum {ND}-day precipitation" - units: "mm" - cell_methods: - input: - data: pr - index_function: - name: thresholded_running_statistics - parameters: - threshold: - kind: quantity - standard_name: lwe_precipitation_rate - long_name: "Wet day threshold" - data: 1 - units: "mm day-1" - condition: - kind: operator - operator: ">" - rolling_aggregator: - kind: reducer - reducer: sum - window_size: - kind: quantity - standard_name: - proposed_standard_name: temporal_window_size - data: {ND} - units: "day" - reducer: - kind: reducer - reducer: max - ET: - short_name: "rxnday" - long_name: "User-defined consecutive days precipitation amount" - definition: "Maximum consecutive n-day precipitation" - comment: - - rh: - reference: ECA&D - period: - allowed: - annual: - seasonal: - monthly: - default: monthly - output: - var_name: "rh" - standard_name: relative_humidity - long_name: "Mean of daily relative humidity" - units: "%" - cell_methods: - - time: mean - input: - data: hurs - index_function: - name: statistics - parameters: - reducer: - kind: reducer - reducer: mean - ET: - short_name: - long_name: - definition: - comment: - - rr: - reference: ECA&D - period: - allowed: - annual: - seasonal: - monthly: - default: monthly - output: - var_name: "rr" - standard_name: lwe_thickness_of_precipitation_amount - long_name: "Precipitation sum" - units: "mm" - cell_methods: - - time: mean within days - - time: mean over days - input: - data: pr - index_function: - name: thresholded_statistics - parameters: - threshold: - kind: quantity - standard_name: lwe_precipitation_rate - long_name: "Wet day threshold" - data: 1 - units: "mm day-1" - condition: - kind: operator - operator: ">" - reducer: - kind: reducer - reducer: sum - ET: - short_name: - long_name: - definition: - comment: - - pp: - reference: ECA&D - period: - allowed: - annual: - seasonal: - monthly: - default: monthly - output: - var_name: "pp" - standard_name: air_pressure_at_sea_level - long_name: "Mean of daily sea level pressure" - units: "hPa" - cell_methods: - - time: mean - input: - data: psl - index_function: - name: statistics - parameters: - reducer: - kind: reducer - reducer: mean - ET: - short_name: - long_name: - definition: - comment: - - tg: - reference: ECA&D - period: - allowed: - annual: - seasonal: - monthly: - default: monthly - output: - var_name: "tg" - standard_name: air_temperature - long_name: "Mean of daily mean temperature" - units: "degree_Celsius" - cell_methods: - - time: mean - input: - data: tas - index_function: - name: statistics - parameters: - reducer: - kind: reducer - reducer: mean - ET: - short_name: - long_name: - definition: - comment: - - tn: - reference: ECA&D - period: - allowed: - annual: - seasonal: - monthly: - default: monthly - output: - var_name: "tn" - standard_name: air_temperature - long_name: "Mean of daily minimum temperature" - units: "degree_Celsius" - cell_methods: - - time: mean - input: - data: tasmin - index_function: - name: statistics - parameters: - reducer: - kind: reducer - reducer: mean - ET: - short_name: - long_name: - definition: - comment: - - tx: - reference: ECA&D - period: - allowed: - annual: - seasonal: - monthly: - default: monthly - output: - var_name: "tx" - standard_name: air_temperature - long_name: "Mean of daily maximum temperature" - units: "degree_Celsius" - cell_methods: - - time: mean - input: - data: tasmax - index_function: - name: statistics - parameters: - reducer: - kind: reducer - reducer: mean - ET: - short_name: - long_name: - definition: - comment: - - sd: - reference: ECA&D - period: - allowed: - annual: - seasonal: - monthly: - default: monthly - output: - var_name: "sd" - standard_name: surface_snow_thickness - long_name: "Mean of daily snow depth" - units: "cm" - cell_methods: - - time: mean - input: - data: snd - index_function: - name: statistics - parameters: - reducer: - kind: reducer - reducer: mean - ET: - short_name: - long_name: - definition: - comment: - - sd1: - reference: ECA&D - period: - allowed: - annual: - seasonal: - monthly: - default: monthly - output: - var_name: "sd1" - standard_name: number_of_days_with_surface_snow_thickness_above_threshold - proposed_standard_name: number_of_occurrences_with_surface_snow_thickness_at_or_above_threshold - long_name: "Snow days (SD >= 1 cm)" - units: "1" - cell_methods: - - time: mean within days - - time: sum over days - input: - data: snd - index_function: - name: count_occurrences - parameters: - threshold: - kind: quantity - standard_name: surface_snow_thickness - data: 1 - units: "cm" - condition: - kind: operator - operator: ">" - ET: - short_name: - long_name: - definition: - comment: - - sd5cm: - reference: ECA&D - period: - allowed: - annual: - seasonal: - monthly: - default: monthly - output: - var_name: "sd5cm" - standard_name: number_of_days_with_surface_snow_thickness_above_threshold - proposed_standard_name: number_of_occurrences_with_surface_snow_thickness_at_or_above_threshold - long_name: "Number of days with snow depth >= 5 cm" - units: "1" - cell_methods: - - time: mean within days - - time: sum over days - input: - data: snd - index_function: - name: count_occurrences - parameters: - threshold: - kind: quantity - standard_name: surface_snow_thickness - data: 5 - units: "cm" - condition: - kind: operator - operator: ">" - ET: - short_name: - long_name: - definition: - comment: - - sd50cm: - reference: ECA&D - period: - allowed: - annual: - seasonal: - monthly: - default: monthly - output: - var_name: "sd50cm" - standard_name: number_of_days_with_surface_snow_thickness_above_threshold - proposed_standard_name: number_of_occurrences_with_surface_snow_thickness_at_or_above_threshold - long_name: "Number of days with snow depth >= 50 cm" - units: "1" - cell_methods: - - time: mean within days - - time: sum over days - input: - data: snd - index_function: - name: count_occurrences - parameters: - threshold: - kind: quantity - standard_name: surface_snow_thickness - data: 50 - units: "cm" - condition: - kind: operator - operator: ">" - ET: - short_name: - long_name: - definition: - comment: - - sd{D}cm: - reference: ECA&D - period: - allowed: - annual: - seasonal: - monthly: - default: monthly - output: - var_name: "sd{D}cm" - standard_name: number_of_days_with_surface_snow_thickness_above_threshold - proposed_standard_name: number_of_occurrences_with_surface_snow_thickness_at_or_above_threshold - long_name: "Number of days with snow depth >= {D} cm" - units: "1" - cell_methods: - - time: mean within days - - time: sum over days - input: - data: snd - index_function: - name: count_occurrences - parameters: - threshold: - kind: quantity - standard_name: surface_snow_thickness - data: {D} - units: "cm" - condition: - kind: operator - operator: ">" - ET: - short_name: - long_name: - definition: - comment: - - ss: - reference: ECA&D - period: - allowed: - annual: - seasonal: - monthly: - default: monthly - output: - var_name: "ss" - standard_name: duration_of_sunshine - long_name: "Sunshine duration, sum" - units: "hour" - cell_methods: - input: - data: sund - index_function: - name: statistics - parameters: - reducer: - kind: reducer - reducer: sum - ET: - short_name: - long_name: - definition: - comment: - - fxx: - reference: ECA&D - period: - allowed: - annual: - seasonal: - monthly: - default: monthly - output: - var_name: "fxx" - standard_name: wind_speed_of_gust - long_name: "Maximum value of daily maximum wind gust strength" - units: "meter second-1" - cell_methods: - - time: maximum - input: - data: wsgsmax - index_function: - name: statistics - parameters: - reducer: - kind: reducer - reducer: max - ET: - short_name: - long_name: - definition: - comment: - - fg6bft: - reference: ECA&D - period: - allowed: - annual: - seasonal: - monthly: - default: monthly - output: - var_name: "fg6bft" - standard_name: number_of_days_with_wind_speed_above_threshold - proposed_standard_name: number_of_occurrences_with_wind_speed_at_or_above_threshold - long_name: "Days with daily averaged wind strength >= 6 Bft (>=10.8 m/s)" - units: "1" - cell_methods: - - time: mean within days - - time: sum over days - input: - data: sfcWind - index_function: - name: count_occurrences - parameters: - threshold: - kind: quantity - standard_name: wind_speed - data: 10.8 - units: "meter second-1" - condition: - kind: operator - operator: ">" - ET: - short_name: - long_name: - definition: - comment: - - fgcalm: - reference: ECA&D - period: - allowed: - annual: - seasonal: - monthly: - default: monthly - output: - var_name: "fgcalm" - standard_name: number_of_days_with_wind_speed_below_threshold - proposed_standard_name: number_of_occurrences_with_wind_speed_at_or_below_threshold - long_name: "Calm days (daily mean wind strength <= 2 m/s)" - units: "1" - cell_methods: - - time: mean within days - - time: sum over days - input: - data: sfcWind - index_function: - name: count_occurrences - parameters: - threshold: - kind: quantity - standard_name: wind_speed - data: 2 - units: "meter second-1" - condition: - kind: operator - operator: "<" - ET: - short_name: - long_name: - definition: - comment: - - fg: - reference: ECA&D - period: - allowed: - annual: - seasonal: - monthly: - default: monthly - output: - var_name: "fg" - standard_name: wind_speed - long_name: "Mean of daily mean wind strength" - units: "meter second-1" - cell_methods: - - time: mean - input: - data: sfcWind - index_function: - name: statistics - parameters: - reducer: - kind: reducer - reducer: mean - ET: - short_name: - long_name: - definition: - comment: - - nzero: - reference: SMHI - period: - allowed: - annual: - seasonal: - monthly: - default: annual - output: - var_name: "nzero" - standard_name: - proposed_standard_name: number_of_occurrences_with_air_temperature_level_crossings - long_name: "Number of zero-crossing days (days when Tmin < 0 degC < Tmax)" - units: "1" - cell_methods: - - time: sum over days - input: - low_data: tasmin - high_data: tasmax - index_function: - name: count_level_crossings - parameters: - threshold: - kind: quantity - standard_name: air_temperature - long_name: "Level crossing value for daily air temperature" - data: 0 - units: "degree_Celsius" - ET: - short_name: - long_name: - definition: - comment: - -# -- icclim additions -- - - wsdi: - reference: ECAD - output: - var_name: "wsdi" - standard_name: - long_name: "Warm-spell duration index (days)" - units: "1" - cell_methods: - input: - data: tasmax - ET: - short_name: "wsdi" - long_name: - definition: "Number of days per period where, in intervals of at least 6 consecutive days TX > 90th percentile" - comment: - - csdi: - reference: ECAD - output: - var_name: "csdi" - standard_name: - long_name: "Cold-spell duration index (days)" - units: "1" - cell_methods: - input: - data: tasmin - ET: - short_name: "csdi" - long_name: - definition: "Number of days per period where, in intervals of at least 6 consecutive days TN < 10th percentile" - comment: - - r75p: - reference: ECAD - output: - var_name: "r75p" - standard_name: - long_name: "Days with RR > 75th percentile of daily amounts (moderate wet days) (days)" - units: "1" - cell_methods: - input: - data: pr - - r95p: - reference: ECAD - output: - var_name: "r95p" - standard_name: - long_name: "Days with RR > 95th percentile of daily amounts (very wet days) (days)" - units: "1" - cell_methods: - input: - data: pr - - r99p: - reference: ECAD - output: - var_name: "r99p" - standard_name: - long_name: "Days with RR > 99th percentile of daily amounts (extremely wet days) (days)" - units: "1" - cell_methods: - input: - data: pr - - r75pTOT: - reference: ECAD - output: - var_name: "r75pTOT" - standard_name: - long_name: "Precipitation fraction due to moderate wet days (> 75th percentile)" - units: "%" - cell_methods: - input: - data: pr - - r95pTOT: - reference: ECAD - output: - var_name: "r95pTOT" - standard_name: - long_name: "Precipitation fraction due to very wet days (> 95th percentile)" - units: "%" - cell_methods: - input: - data: pr - - r99pTOT: - reference: ECAD - output: - var_name: "r99pTOT" - standard_name: - long_name: "Precipitation fraction due to extremely wet days (> 99th percentile)" - units: "%" - cell_methods: - input: - data: pr - - CD: - reference: ECAD - output: - var_name: "cd" - standard_name: - long_name: "Days with TG < 25th percentile of daily mean temperature\n\ \ \ \ \ - and RR <25th percentile of daily precipitation sum (cold/dry days)" - units: "1" - cell_methods: - input: - data: - - pr - - tas - - CW: - reference: ECAD - output: - var_name: "cw" - standard_name: - long_name: "Days with TG < 25th percentile of daily mean temperature\n\ \ \ \ \ - and RR >75th percentile of daily precipitation sum (cold/wet days)" - units: "1" - cell_methods: - input: - data: - - pr - - tas - - WD: - reference: ECAD - output: - var_name: "wd" - standard_name: - long_name: "Days with TG > 75th percentile of daily mean temperature\n\ \ \ \ \ - and RR <25th percentile of daily precipitation sum (warm/dry days)" - units: "1" - cell_methods: - input: - data: - - pr - - tas - - WW: - reference: ECAD - output: - var_name: "ww" - standard_name: - long_name: "Days with TG > 75th percentile of daily mean temperature\n\ \ \ \ \ - and RR >75th percentile of daily precipitation sum (warm/wet days)" - units: "1" - cell_methods: - input: - data: - - pr - - tas diff --git a/icclim/ecad/ecad_functions.py b/icclim/ecad/ecad_functions.py deleted file mode 100644 index 0a7deebb..00000000 --- a/icclim/ecad/ecad_functions.py +++ /dev/null @@ -1,849 +0,0 @@ -"""All ECA&D functions. Each function wraps its xclim equivalent functions adding icclim -metadata to it. -""" -from __future__ import annotations - -import re -from typing import Callable -from warnings import warn - -import numpy as np -import xarray as xr -import xclim.core.utils -from xarray import DataArray -from xclim import atmos, land -from xclim.core.calendar import percentile_doy, resample_doy -from xclim.core.units import convert_units_to -from xclim.core.utils import PercentileDataArray - -from icclim.models.cf_calendar import CfCalendar -from icclim.models.cf_variable import CfVariable -from icclim.models.constants import IN_BASE_IDENTIFIER, PERCENTILES_COORD -from icclim.models.frequency import Frequency -from icclim.models.index_config import IndexConfig -from icclim.models.quantile_interpolation import QuantileInterpolation - - -def gd4(config: IndexConfig) -> DataArray: - return _compute_threshold_index( - da=config.tas.study_da, - threshold=4.0 if config.threshold is None else config.threshold, - freq=config.frequency, - xclim_index_fun=atmos.growing_degree_days, - ) - - -def cfd(config: IndexConfig) -> DataArray: - return _compute_threshold_index( - da=config.tasmin.study_da, - threshold=0.0 if config.threshold is None else config.threshold, - freq=config.frequency, - xclim_index_fun=atmos.consecutive_frost_days, - ) - - -def fd(config: IndexConfig) -> DataArray: - return _compute_threshold_index( - da=config.tasmin.study_da, - threshold=0.0 if config.threshold is None else config.threshold, - freq=config.frequency, - xclim_index_fun=atmos.frost_days, - ) - - -def hd17(config: IndexConfig) -> DataArray: - return _compute_threshold_index( - da=config.tas.study_da, - threshold=17.0 if config.threshold is None else config.threshold, - freq=config.frequency, - xclim_index_fun=atmos.heating_degree_days, - ) - - -def id(config: IndexConfig) -> DataArray: - return _compute_threshold_index( - da=config.tasmax.study_da, - threshold=0.0 if config.threshold is None else config.threshold, - freq=config.frequency, - xclim_index_fun=atmos.ice_days, - ) - - -def csdi(config: IndexConfig) -> tuple[DataArray, DataArray | None]: - thresh = 10 if config.threshold is None else config.threshold - return _compute_spell_duration( - cf_var=config.tasmin, - freq=config.frequency, - per_thresh=thresh, - per_window=config.window, - per_interpolation=config.interpolation, - min_spell_duration=6, - save_percentile=config.save_percentile, - callback=config.callback, - xclim_index_fun=atmos.cold_spell_duration_index, - ) - - -def tg10p(config: IndexConfig) -> tuple[DataArray, DataArray | None]: - thresh = 10 if config.threshold is None else config.threshold - return _compute_temperature_percentile_index( - cf_var=config.tas, - freq=config.frequency, - tas_per_thresh=thresh, - per_window=config.window, - per_interpolation=config.interpolation, - save_percentile=config.save_percentile, - is_percent=config.is_percent, - callback=config.callback, - xclim_index_fun=atmos.tg10p, - ) - - -def tn10p(config: IndexConfig) -> tuple[DataArray, DataArray | None]: - thresh = 10 if config.threshold is None else config.threshold - return _compute_temperature_percentile_index( - cf_var=config.tasmin, - freq=config.frequency, - tas_per_thresh=thresh, - per_window=config.window, - per_interpolation=config.interpolation, - save_percentile=config.save_percentile, - is_percent=config.is_percent, - callback=config.callback, - xclim_index_fun=atmos.tn10p, - ) - - -def tx10p(config: IndexConfig) -> tuple[DataArray, DataArray | None]: - thresh = 10 if config.threshold is None else config.threshold - return _compute_temperature_percentile_index( - cf_var=config.tasmax, - freq=config.frequency, - tas_per_thresh=thresh, - per_window=config.window, - per_interpolation=config.interpolation, - save_percentile=config.save_percentile, - is_percent=config.is_percent, - callback=config.callback, - xclim_index_fun=atmos.tx10p, - ) - - -def txn(config: IndexConfig) -> DataArray: - result = atmos.tx_min( - config.tasmax.study_da, **config.frequency.build_frequency_kwargs() - ) - result = convert_units_to(result, "degree_Celsius") - return result - - -def tnn(config: IndexConfig) -> DataArray: - result = atmos.tn_min( - config.tasmin.study_da, **config.frequency.build_frequency_kwargs() - ) - result = convert_units_to(result, "degree_Celsius") - return result - - -def cdd(config: IndexConfig) -> DataArray: - result = atmos.maximum_consecutive_dry_days( - config.pr.study_da, - thresh="1.0 mm/day", - **config.frequency.build_frequency_kwargs(), - ) - return result - - -def su(config: IndexConfig) -> DataArray: - return _compute_threshold_index( - da=config.tasmax.study_da, - threshold=25.0 if config.threshold is None else config.threshold, - freq=config.frequency, - xclim_index_fun=atmos.tx_days_above, - ) - - -def tr(config: IndexConfig) -> DataArray: - return _compute_threshold_index( - da=config.tasmin.study_da, - threshold=20.0 if config.threshold is None else config.threshold, - freq=config.frequency, - xclim_index_fun=atmos.tropical_nights, - ) - - -def wsdi(config: IndexConfig) -> tuple[DataArray, DataArray | None]: - thresh = 90 if config.threshold is None else config.threshold - return _compute_spell_duration( - cf_var=config.tasmax, - freq=config.frequency, - per_thresh=thresh, - per_window=config.window, - per_interpolation=config.interpolation, - min_spell_duration=6, - save_percentile=config.save_percentile, - callback=config.callback, - xclim_index_fun=atmos.warm_spell_duration_index, - ) - - -def tg90p(config: IndexConfig) -> tuple[DataArray, DataArray | None]: - thresh = 90 if config.threshold is None else config.threshold - return _compute_temperature_percentile_index( - cf_var=config.tas, - freq=config.frequency, - tas_per_thresh=thresh, - per_window=config.window, - per_interpolation=config.interpolation, - save_percentile=config.save_percentile, - is_percent=config.is_percent, - callback=config.callback, - xclim_index_fun=atmos.tg90p, - ) - - -def tn90p(config: IndexConfig) -> tuple[DataArray, DataArray | None]: - thresh = 90 if config.threshold is None else config.threshold - return _compute_temperature_percentile_index( - cf_var=config.tasmin, - freq=config.frequency, - tas_per_thresh=thresh, - per_window=config.window, - per_interpolation=config.interpolation, - save_percentile=config.save_percentile, - is_percent=config.is_percent, - callback=config.callback, - xclim_index_fun=atmos.tn90p, - ) - - -def tx90p(config: IndexConfig) -> tuple[DataArray, DataArray | None]: - thresh = 90 if config.threshold is None else config.threshold - return _compute_temperature_percentile_index( - cf_var=config.tasmax, - freq=config.frequency, - tas_per_thresh=thresh, - per_window=config.window, - per_interpolation=config.interpolation, - save_percentile=config.save_percentile, - is_percent=config.is_percent, - callback=config.callback, - xclim_index_fun=atmos.tx90p, - ) - - -def txx(config: IndexConfig) -> DataArray: - result = atmos.tx_max( - config.tasmax.study_da, **config.frequency.build_frequency_kwargs() - ) - result = convert_units_to(result, "degree_Celsius") - return result - - -def tnx(config: IndexConfig) -> DataArray: - result = atmos.tn_max( - config.tasmin.study_da, **config.frequency.build_frequency_kwargs() - ) - result = convert_units_to(result, "degree_Celsius") - return result - - -def csu(config: IndexConfig) -> DataArray: - return _compute_threshold_index( - da=config.tasmax.study_da, - threshold=25.0 if config.threshold is None else config.threshold, - freq=config.frequency, - xclim_index_fun=atmos.maximum_consecutive_warm_days, - ) - - -def prcptot(config: IndexConfig) -> DataArray: - result = atmos.precip_accumulation( - _filter_in_wet_days(config.pr.study_da, dry_day_value=0), - **config.frequency.build_frequency_kwargs(), - ) - return result - - -def rr1(config: IndexConfig) -> DataArray: - result = atmos.wetdays( - config.pr.study_da, - thresh="1.0 mm/day", - **config.frequency.build_frequency_kwargs(), - ) - return result - - -def sdii(config: IndexConfig) -> DataArray: - result = atmos.daily_pr_intensity( - config.pr.study_da, - thresh="1.0 mm/day", - **config.frequency.build_frequency_kwargs(), - ) - return result - - -def cwd(config: IndexConfig) -> DataArray: - result = atmos.maximum_consecutive_wet_days( - config.pr.study_da, - thresh="1.0 mm/day", - **config.frequency.build_frequency_kwargs(), - ) - return result - - -def r10mm(config: IndexConfig) -> DataArray: - result = atmos.wetdays( - config.pr.study_da, - thresh="10 mm/day", - **config.frequency.build_frequency_kwargs(), - ) - return result - - -def r20mm(config: IndexConfig) -> DataArray: - result = atmos.wetdays( - config.pr.study_da, - thresh="20 mm/day", - **config.frequency.build_frequency_kwargs(), - ) - return result - - -def rx1day(config: IndexConfig) -> DataArray: - result = atmos.max_1day_precipitation_amount( - config.pr.study_da, **config.frequency.build_frequency_kwargs() - ) - return result - - -def rx5day(config: IndexConfig) -> DataArray: - result = atmos.max_n_day_precipitation_amount( - config.pr.study_da, window=5, **config.frequency.build_frequency_kwargs() - ) - return result - - -def r75p(config: IndexConfig) -> tuple[DataArray, DataArray | None]: - return _compute_rxxp( - pr=config.pr, - freq=config.frequency, - pr_per_thresh=75.0, - per_interpolation=config.interpolation, - save_percentile=config.save_percentile, - is_percent=config.is_percent, - ) - - -def r75ptot(config: IndexConfig) -> tuple[DataArray, DataArray | None]: - return _compute_rxxptot( - pr=config.pr, - freq=config.frequency, - pr_per_thresh=75.0, - per_interpolation=config.interpolation, - save_percentile=config.save_percentile, - ) - - -def r95p(config: IndexConfig) -> tuple[DataArray, DataArray | None]: - return _compute_rxxp( - pr=config.pr, - freq=config.frequency, - pr_per_thresh=95.0, - per_interpolation=config.interpolation, - save_percentile=config.save_percentile, - is_percent=config.is_percent, - ) - - -def r95ptot(config: IndexConfig) -> tuple[DataArray, DataArray | None]: - return _compute_rxxptot( - pr=config.pr, - freq=config.frequency, - pr_per_thresh=95.0, - per_interpolation=config.interpolation, - save_percentile=config.save_percentile, - ) - - -def r99p(config: IndexConfig) -> tuple[DataArray, DataArray | None]: - return _compute_rxxp( - pr=config.pr, - freq=config.frequency, - pr_per_thresh=99.0, - per_interpolation=config.interpolation, - save_percentile=config.save_percentile, - is_percent=config.is_percent, - ) - - -def r99ptot(config: IndexConfig) -> tuple[DataArray, DataArray | None]: - return _compute_rxxptot( - pr=config.pr, - freq=config.frequency, - pr_per_thresh=99.0, - per_interpolation=config.interpolation, - save_percentile=config.save_percentile, - ) - - -def sd(config: IndexConfig) -> DataArray: - result = land.snow_depth( - config.pr.study_da, **config.frequency.build_frequency_kwargs() - ) - return result - - -def sd1(config: IndexConfig) -> DataArray: - result = land.snow_cover_duration( - config.pr.study_da, thresh="1 cm", **config.frequency.build_frequency_kwargs() - ) - return result - - -def sd5cm(config: IndexConfig) -> DataArray: - result = land.snow_cover_duration( - config.pr.study_da, thresh="5 cm", **config.frequency.build_frequency_kwargs() - ) - return result - - -def sd50cm(config: IndexConfig) -> DataArray: - result = land.snow_cover_duration( - config.pr.study_da, thresh="50 cm", **config.frequency.build_frequency_kwargs() - ) - return result - - -def tg(config: IndexConfig) -> DataArray: - result = atmos.tg_mean( - config.tas.study_da, **config.frequency.build_frequency_kwargs() - ) - result = convert_units_to(result, "degree_Celsius") - return result - - -def tn(config: IndexConfig) -> DataArray: - result = atmos.tn_mean( - config.tasmin.study_da, **config.frequency.build_frequency_kwargs() - ) - result = convert_units_to(result, "degree_Celsius") - return result - - -def tx(config: IndexConfig) -> DataArray: - result = atmos.tx_mean( - config.tasmax.study_da, **config.frequency.build_frequency_kwargs() - ) - result = convert_units_to(result, "degree_Celsius") - return result - - -def dtr(config: IndexConfig) -> DataArray: - result = atmos.daily_temperature_range( - tasmax=config.tasmax.study_da, - tasmin=config.tasmin.study_da, - **config.frequency.build_frequency_kwargs(), - ) - result.attrs["units"] = "degree_Celsius" - return result - - -def etr(config: IndexConfig) -> DataArray: - result = atmos.extreme_temperature_range( - tasmax=config.tasmax.study_da, - tasmin=config.tasmin.study_da, - **config.frequency.build_frequency_kwargs(), - ) - result.attrs["units"] = "degree_Celsius" - return result - - -def vdtr(config: IndexConfig) -> DataArray: - result = atmos.daily_temperature_range_variability( - tasmax=config.tasmax.study_da, - tasmin=config.tasmin.study_da, - **config.frequency.build_frequency_kwargs(), - ) - result.attrs["units"] = "degree_Celsius" - return result - - -def cd(config: IndexConfig) -> DataArray: - return compute_compound_index( - tas=config.tas, - pr=config.pr, - freq=config.frequency, - tas_per_thresh=25, - pr_per_thresh=25, - per_window=config.window, - per_interpolation=config.interpolation, - save_percentile=config.save_percentile, - callback=config.callback, - xclim_index_fun=atmos.cold_and_dry_days, - ) - - -def cw(config: IndexConfig) -> DataArray: - return compute_compound_index( - tas=config.tas, - pr=config.pr, - freq=config.frequency, - tas_per_thresh=25, - pr_per_thresh=75, - per_window=config.window, - per_interpolation=config.interpolation, - save_percentile=config.save_percentile, - callback=config.callback, - xclim_index_fun=atmos.cold_and_wet_days, - ) - - -def wd(config: IndexConfig) -> DataArray: - return compute_compound_index( - tas=config.tas, - pr=config.pr, - freq=config.frequency, - tas_per_thresh=75, - pr_per_thresh=25, - per_window=config.window, - per_interpolation=config.interpolation, - save_percentile=config.save_percentile, - callback=config.callback, - xclim_index_fun=atmos.warm_and_dry_days, - ) - - -def ww(config: IndexConfig) -> DataArray: - return compute_compound_index( - tas=config.tas, - pr=config.pr, - freq=config.frequency, - tas_per_thresh=75, - pr_per_thresh=75, - per_window=config.window, - per_interpolation=config.interpolation, - save_percentile=config.save_percentile, - callback=config.callback, - xclim_index_fun=atmos.warm_and_wet_days, - ) - - -def _can_run_bootstrap(cf_var: CfVariable) -> bool: - """Avoid bootstrapping if there is one single year overlapping - or no year overlapping or all year overlapping. - """ - study_years = np.unique(cf_var.study_da.indexes.get("time").year) - overlapping_years = np.unique( - cf_var.study_da.sel(time=_get_ref_period_slice(cf_var.reference_da)) - .indexes.get("time") - .year - ) - return 1 < len(overlapping_years) < len(study_years) - - -def _get_ref_period_slice(da: DataArray) -> slice: - time_length = len(da.time) - return slice(*da.time[0 :: time_length - 1].dt.strftime("%Y-%m-%d").values) - - -def _to_percent(da: DataArray, sampling_freq: Frequency) -> DataArray: - if sampling_freq == Frequency.MONTH: - da = da / da.time.dt.daysinmonth * 100 - elif sampling_freq == Frequency.YEAR: - coef = xr.full_like(da, 1) - leap_years = _is_leap_year(da) - coef[{"time": leap_years}] = 366 - coef[{"time": ~leap_years}] = 365 - da = da / coef - elif sampling_freq == Frequency.AMJJAS: - da = da / 183 - elif sampling_freq == Frequency.ONDJFM: - coef = xr.full_like(da, 1) - leap_years = _is_leap_year(da) - coef[{"time": leap_years}] = 183 - coef[{"time": ~leap_years}] = 182 - da = da / coef - elif sampling_freq == Frequency.DJF: - coef = xr.full_like(da, 1) - leap_years = _is_leap_year(da) - coef[{"time": leap_years}] = 91 - coef[{"time": ~leap_years}] = 90 - da = da / coef - elif sampling_freq in [Frequency.MAM, Frequency.JJA]: - da = da / 92 - elif sampling_freq == Frequency.SON: - da = da / 91 - else: - # TODO improve this for custom resampling - warn( - "For now, '%' unit can only be used when `slice_mode` is one of: " - "{MONTH, YEAR, AMJJAS, ONDJFM, DJF, MAM, JJA, SON}." - ) - return da - da.attrs["units"] = "1" - return da - - -def _is_leap_year(da: DataArray) -> np.ndarray: - time_index = da.indexes.get("time") - if isinstance(time_index, xr.CFTimeIndex): - return CfCalendar.lookup(time_index.calendar).is_leap(da.time.dt.year) - else: - return da.time.dt.is_leap_year - - -def _add_bootstrap_meta(result: DataArray, per: DataArray) -> DataArray: - result.attrs[IN_BASE_IDENTIFIER] = per.climatology_bounds - return result - - -def _compute_percentile_doy( - cf_var: CfVariable, - percentile: float, - window: int = 5, - interpolation=QuantileInterpolation.MEDIAN_UNBIASED, - callback: Callable = None, -) -> (DataArray, bool): - if PercentileDataArray.is_compatible(cf_var.reference_da): - per = cf_var.reference_da - run_bootstrap = False - else: - per = percentile_doy( - cf_var.reference_da, - window, - percentile, - alpha=interpolation.alpha, - beta=interpolation.beta, - ).compute() # dask "optimization" - run_bootstrap = _can_run_bootstrap(cf_var) - if callback is not None: - callback(50) - return per, run_bootstrap - - -def _compute_precip_percentile_over_period( - cf_var: CfVariable, interpolation: QuantileInterpolation, percentiles: float -) -> DataArray: - if PercentileDataArray.is_compatible(cf_var.reference_da): - return cf_var.reference_da - else: - base_wet_days = _filter_in_wet_days(cf_var.reference_da, dry_day_value=np.nan) - return xr.apply_ufunc( - xclim.core.utils.calc_perc, - base_wet_days, - input_core_dims=[["time"]], - output_core_dims=[[PERCENTILES_COORD]], - kwargs=dict( - percentiles=[percentiles], - alpha=interpolation.alpha, - beta=interpolation.beta, - ), - dask="parallelized", - output_dtypes=[base_wet_days.dtype], - dask_gufunc_kwargs=dict(output_sizes={PERCENTILES_COORD: len([percentiles])}), - ) - - -def _filter_in_wet_days(da: DataArray, dry_day_value: float): - """Turns non wet days to NaN. dry_day_value should be NaN or 0.""" - precip = convert_units_to(da, "mm/d") - return precip.where(precip > 1, dry_day_value) - - -def _compute_threshold_index( - da: DataArray, - threshold: float, - freq: Frequency, - xclim_index_fun: Callable, -) -> DataArray: - result = xclim_index_fun( - da, thresh=f"{threshold} degree_Celsius", **freq.build_frequency_kwargs() - ) - return result - - -def _compute_spell_duration( - cf_var: CfVariable, - freq: Frequency, - per_window: int, - per_thresh: float, - per_interpolation: QuantileInterpolation, - min_spell_duration: int, - save_percentile: bool, - callback: Callable, - xclim_index_fun: Callable, -) -> tuple[DataArray, DataArray | None]: - per, run_bootstrap = _compute_percentile_doy( - cf_var, - per_thresh, - per_window, - per_interpolation, - callback, - ) - result = xclim_index_fun( - cf_var.study_da, - per, - window=min_spell_duration, - **freq.build_frequency_kwargs(), - bootstrap=run_bootstrap, - ) - result = result.squeeze(PERCENTILES_COORD, drop=True) - if run_bootstrap: - result = _add_bootstrap_meta(result, per) - if save_percentile: - return result, per - result.attrs["description"] = re.sub( - r"\s\w+th\spercentile", - f" {per_thresh}th percentile", - result.attrs.get("description"), - ) - return result, None - - -def compute_compound_index( - tas: CfVariable, - pr: CfVariable, - freq: Frequency, - tas_per_thresh: int, - pr_per_thresh: int, - per_window: int, - per_interpolation: QuantileInterpolation, - save_percentile: bool, - callback: Callable, - xclim_index_fun: Callable, -) -> DataArray: - """CD, CW, WD, WW - - Parameters - ---------- - tas : CfVariable - CfVariable of tas variable. - pr : CfVariable - DataArray of pr variable. - freq : Frequency - Sampling frequency of the output. - save_percentile : bool - Flag to include coordinate variable including the computed percentiles. - Does not contain the bootstrapped percentiles. - Default is False. - callback : Callable - callback receiving an integer, may serve as a loading bar. - - Returns - ------- - If save_percentile is True, returns a Tuple of index_result, - computed_percentiles. Otherwise, returns the index_result - """ - tas_per, _ = _compute_percentile_doy( - tas, - tas_per_thresh, - per_window, - per_interpolation, - callback, - ) - tas_per = tas_per.squeeze(PERCENTILES_COORD, drop=True) - pr.reference_da = _filter_in_wet_days(pr.reference_da, dry_day_value=np.NAN) - pr.study_da = _filter_in_wet_days(pr.study_da, dry_day_value=0) - pr_per, _ = _compute_percentile_doy( - pr, - pr_per_thresh, - per_window, - per_interpolation, - callback, - ) - pr_per = pr_per.squeeze(PERCENTILES_COORD, drop=True) - result = xclim_index_fun( - tas=tas.study_da, - pr=pr.study_da, - tas_per=tas_per, - pr_per=pr_per, - **freq.build_frequency_kwargs(), - ) - if save_percentile: - # FIXME, not consistent with other percentile based indices - # We should probably return a Tuple (res, [tas_per, pr_per]) - # However, here the percentiles use the existing time dimension and not - # doy - result.coords["tas_per"] = resample_doy(tas_per, result) - result.coords["pr_per"] = resample_doy(pr_per, result) - return result - - -def _compute_rxxptot( - pr: CfVariable, - freq: Frequency, - pr_per_thresh: float, - per_interpolation: QuantileInterpolation, - save_percentile: bool, -) -> tuple[DataArray, DataArray | None]: - per = _compute_precip_percentile_over_period(pr, per_interpolation, pr_per_thresh) - result = atmos.fraction_over_precip_thresh( - pr.study_da, - per, - thresh="1 mm/day", - **freq.build_frequency_kwargs(), - bootstrap=False, - ).squeeze(PERCENTILES_COORD, drop=True) - result = result * 100 - result.attrs["units"] = "%" - if save_percentile: - return result, per - return result, None - - -def _compute_rxxp( - pr: CfVariable, - freq: Frequency, - pr_per_thresh: float, - per_interpolation: QuantileInterpolation, - save_percentile: bool, - is_percent: bool, -) -> tuple[DataArray, DataArray | None]: - per = _compute_precip_percentile_over_period(pr, per_interpolation, pr_per_thresh) - result = atmos.days_over_precip_thresh( - pr.study_da, - per, - thresh="1 mm/day", - **freq.build_frequency_kwargs(), - bootstrap=False, - ) - result = result.squeeze(PERCENTILES_COORD, drop=True) - if is_percent: - result = _to_percent(result, freq) - if save_percentile: - return result, per - return result, None - - -def _compute_temperature_percentile_index( - cf_var: CfVariable, - freq: Frequency, - tas_per_thresh: int, - per_window: int, - per_interpolation: QuantileInterpolation, - save_percentile: bool, - is_percent: bool, - callback: Callable, - xclim_index_fun: Callable, -) -> tuple[DataArray, DataArray | None]: - per, run_bootstrap = _compute_percentile_doy( - cf_var, - tas_per_thresh, - per_window, - per_interpolation, - callback, - ) - result = xclim_index_fun( - cf_var.study_da, - per, - **freq.build_frequency_kwargs(), - bootstrap=run_bootstrap, - ).squeeze(PERCENTILES_COORD, drop=True) - if run_bootstrap: - result = _add_bootstrap_meta(result, per) - if is_percent: - result = _to_percent(result, freq) - if save_percentile: - return result, per - return result, None diff --git a/icclim/ecad/ecad_indices.py b/icclim/ecad/ecad_indices.py index 2cbfe2e0..910e2a02 100644 --- a/icclim/ecad/ecad_indices.py +++ b/icclim/ecad/ecad_indices.py @@ -1,526 +1,637 @@ from __future__ import annotations -from enum import Enum +from icclim.generic_indices.cf_var_metadata import StandardVariableRegistry +from icclim.generic_indices.generic_indicators import GenericIndicatorRegistry +from icclim.models.constants import ECAD_ATBD, QUANTILE_BASED +from icclim.models.index_group import IndexGroupRegistry +from icclim.models.registry import Registry +from icclim.models.standard_index import StandardIndex +from icclim.models.threshold import Threshold -from icclim.clix_meta.clix_meta_indices import ClixMetaIndices -from icclim.ecad.ecad_functions import ( - cd, - cdd, - cfd, - csdi, - csu, - cw, - cwd, - dtr, - etr, - fd, - gd4, - hd17, - id, - prcptot, - r10mm, - r20mm, - r75p, - r75ptot, - r95p, - r95ptot, - r99p, - r99ptot, - rr1, - rx1day, - rx5day, - sd, - sd1, - sd5cm, - sd50cm, - sdii, - su, - tg, - tg10p, - tg90p, - tn, - tn10p, - tn90p, - tnn, - tnx, - tr, - tx, - tx10p, - tx90p, - txn, - txx, - vdtr, - wd, - wsdi, - ww, +ECAD_REFERENCE = ( + "ATBD of the ECA&D indices calculation" + " (https://knmi-ecad-assets-prd.s3.amazonaws.com/documents/atbd.pdf)" ) -from icclim.icclim_exceptions import InvalidIcclimArgumentError -from icclim.models.climate_index import ClimateIndex -from icclim.models.constants import ( - ECAD_ATBD, - MODIFIABLE_QUANTILE_WINDOW, - MODIFIABLE_THRESHOLD, - MODIFIABLE_UNIT, - PR, - QUANTILE_BASED, - TAS, - TAS_MAX, - TAS_MIN, -) -from icclim.models.index_group import IndexGroup - -clix_indices: ClixMetaIndices = ClixMetaIndices.get_instance() -def _get_clix_definition(short_name: str) -> str: - definition = "" - clix_index = clix_indices.lookup(short_name) - if clix_index is not None: - definition = clix_index["output"]["long_name"] - return definition +class EcadIndexRegistry(Registry): + _item_class = StandardIndex + # TODO Add indices of wind gust, wind direction, + # radiation, pressure, + # cloud cover, sunshine, + # humidity + @staticmethod + def get_item_aliases(item: StandardIndex) -> list[str]: + return [item.short_name] -class EcadIndex(Enum): - """ - ECA&D indices. - short_name: str - The index name used in the output. - compute: Callable - The function to compute the index. It wraps Xclim functions. - group: IndexGroup - The index group category. - variables: List[List[str]] - The Cf variables needed to compute the index. - The variable are individually described by a list of aliases. - qualifiers: List[str] - ``optional`` List of configuration to compute the index. - Used internally to generate modules for C3S. - """ + @classmethod + def list(cls: EcadIndexRegistry) -> list[str]: + return [ + f"{i.group.name} | {i.short_name} | {i.definition}" for i in cls.values() + ] - # Temperature - TG = ClimateIndex( + TG = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.Average, + output_unit="degree_Celsius", + definition="Mean of daily mean temperature", + source=ECAD_ATBD, short_name="TG", - compute=lambda c: tg(c), - group=IndexGroup.TEMPERATURE, - input_variables=[TAS], - ) - TN = ClimateIndex( + group=IndexGroupRegistry.TEMPERATURE, + input_variables=[StandardVariableRegistry.TAS], + ) + TN = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.Average, + output_unit="degree_Celsius", + definition="Mean of daily minimum temperature", + source=ECAD_ATBD, short_name="TN", - compute=lambda c: tn(c), - group=IndexGroup.TEMPERATURE, - input_variables=[TAS_MIN], - ) - TX = ClimateIndex( + group=IndexGroupRegistry.TEMPERATURE, + input_variables=[StandardVariableRegistry.TAS_MIN], + ) + TX = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.Average, + output_unit="degree_Celsius", + definition="Mean of daily maximum temperature", + source=ECAD_ATBD, short_name="TX", - compute=lambda c: tx(c), - group=IndexGroup.TEMPERATURE, - input_variables=[TAS_MAX], - ) - DTR = ClimateIndex( + group=IndexGroupRegistry.TEMPERATURE, + input_variables=[StandardVariableRegistry.TAS_MAX], + ) + DTR = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.MeanOfDifference, + output_unit="degree_Celsius", + definition="Mean Diurnal Temperature Range", + source=ECAD_ATBD, short_name="DTR", - compute=lambda c: dtr(c), - group=IndexGroup.TEMPERATURE, - input_variables=[TAS_MAX, TAS_MIN], + group=IndexGroupRegistry.TEMPERATURE, + input_variables=[ + StandardVariableRegistry.TAS_MAX, + StandardVariableRegistry.TAS_MIN, + ], ) - ETR = ClimateIndex( + ETR = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.DifferenceOfExtremes, + output_unit="degree_Celsius", + definition="Intra-period extreme temperature range", + source=ECAD_ATBD, short_name="ETR", - compute=lambda c: etr(c), - group=IndexGroup.TEMPERATURE, - input_variables=[TAS_MAX, TAS_MIN], + group=IndexGroupRegistry.TEMPERATURE, + input_variables=[ + StandardVariableRegistry.TAS_MAX, + StandardVariableRegistry.TAS_MIN, + ], ) - VDTR = ClimateIndex( + VDTR = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.MeanOfAbsoluteOneTimeStepDifference, + output_unit="degree_Celsius", + definition="Mean day-to-day variation in Diurnal Temperature Range", + source=ECAD_ATBD, short_name="vDTR", - compute=lambda c: vdtr(c), - group=IndexGroup.TEMPERATURE, - input_variables=[TAS_MAX, TAS_MIN], + group=IndexGroupRegistry.TEMPERATURE, + input_variables=[ + StandardVariableRegistry.TAS_MAX, + StandardVariableRegistry.TAS_MIN, + ], ) # Heat - SU = ClimateIndex( + SU = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.CountOccurrences, + output_unit="day", + definition="Number of Summer Days (Tmax > 25C)", + source=ECAD_ATBD, short_name="SU", - compute=lambda c: su(c), - group=IndexGroup.HEAT, - input_variables=[TAS_MAX], - qualifiers=[MODIFIABLE_THRESHOLD], - ) - TR = ClimateIndex( + threshold="> 25 degree_Celsius", + group=IndexGroupRegistry.HEAT, + input_variables=[StandardVariableRegistry.TAS_MAX], + qualifiers=[], + ) + TR = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.CountOccurrences, + output_unit="day", + definition="Number of Tropical Nights (Tmin > 20C)", + source=ECAD_ATBD, short_name="TR", - compute=lambda c: tr(c), - group=IndexGroup.HEAT, - input_variables=[TAS_MIN], - qualifiers=[MODIFIABLE_THRESHOLD], - ) - WSDI = ClimateIndex( + threshold="> 20 degree_Celsius", + group=IndexGroupRegistry.HEAT, + input_variables=[StandardVariableRegistry.TAS_MIN], + qualifiers=[], + ) + WSDI = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.SumOfSpellLengths, + output_unit="day", + definition="Warm-spell duration index (days)", + source=ECAD_ATBD, short_name="WSDI", - compute=lambda c: wsdi(c), - group=IndexGroup.HEAT, - input_variables=[TAS_MAX], - qualifiers=[QUANTILE_BASED, MODIFIABLE_QUANTILE_WINDOW, MODIFIABLE_THRESHOLD], - ) - TG90P = ClimateIndex( + threshold="> 90 doy_per", + group=IndexGroupRegistry.HEAT, + input_variables=[StandardVariableRegistry.TAS_MAX], + qualifiers=[QUANTILE_BASED], + doy_window_width=5, + min_spell_length=6, + ) + TG90P = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.CountOccurrences, + output_unit="day", + definition="Days when Tmean > 90th percentile", + threshold="> 90 doy_per", + source=ECAD_ATBD, short_name="TG90p", - compute=lambda c: tg90p(c), - group=IndexGroup.HEAT, - input_variables=[TAS], - qualifiers=[ - QUANTILE_BASED, - MODIFIABLE_QUANTILE_WINDOW, - MODIFIABLE_UNIT, - MODIFIABLE_THRESHOLD, - ], - ) - TN90P = ClimateIndex( + group=IndexGroupRegistry.HEAT, + input_variables=[StandardVariableRegistry.TAS], + qualifiers=[QUANTILE_BASED], + doy_window_width=5, + ) + TN90P = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.CountOccurrences, + output_unit="day", + definition="Days when Tmin > 90th percentile", + threshold="> 90 doy_per", + source=ECAD_ATBD, short_name="TN90p", - compute=lambda c: tn90p(c), - group=IndexGroup.HEAT, - input_variables=[TAS_MIN], - qualifiers=[ - QUANTILE_BASED, - MODIFIABLE_QUANTILE_WINDOW, - MODIFIABLE_UNIT, - MODIFIABLE_THRESHOLD, - ], - ) - TX90P = ClimateIndex( + group=IndexGroupRegistry.HEAT, + input_variables=[StandardVariableRegistry.TAS_MIN], + qualifiers=[QUANTILE_BASED], + doy_window_width=5, + ) + TX90P = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.CountOccurrences, + threshold="> 90 doy_per", + output_unit="day", + definition="Days when Tmax > 90th daily percentile", + source=ECAD_ATBD, short_name="TX90p", - compute=lambda c: tx90p(c), - group=IndexGroup.HEAT, - input_variables=[TAS_MAX], - qualifiers=[ - QUANTILE_BASED, - MODIFIABLE_QUANTILE_WINDOW, - MODIFIABLE_UNIT, - MODIFIABLE_THRESHOLD, - ], - ) - TXX = ClimateIndex( + group=IndexGroupRegistry.HEAT, + input_variables=[StandardVariableRegistry.TAS_MAX], + qualifiers=[QUANTILE_BASED], + doy_window_width=5, + ) + TXX = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.Maximum, + output_unit="degree_Celsius", + definition="Maximum daily maximum temperature", + source=ECAD_ATBD, short_name="TXx", - compute=lambda c: txx(c), - group=IndexGroup.HEAT, - input_variables=[TAS_MAX], - ) - TNX = ClimateIndex( + group=IndexGroupRegistry.HEAT, + input_variables=[StandardVariableRegistry.TAS_MAX], + ) + TNX = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.Minimum, + output_unit="degree_Celsius", + definition="Maximum daily minimum temperature", + source=ECAD_ATBD, short_name="TNx", - compute=lambda c: tnx(c), - group=IndexGroup.HEAT, - input_variables=[TAS_MIN], - ) - CSU = ClimateIndex( + group=IndexGroupRegistry.HEAT, + input_variables=[StandardVariableRegistry.TAS_MIN], + ) + CSU = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.MaxConsecutiveOccurrence, + threshold="> 25 degree_Celsius", + output_unit="day", + definition="Maximum number of consecutive summer days (Tmax >25 C)", + source=ECAD_ATBD, short_name="CSU", - compute=lambda c: csu(c), - group=IndexGroup.HEAT, - input_variables=[TAS_MAX], - qualifiers=[MODIFIABLE_THRESHOLD], + group=IndexGroupRegistry.HEAT, + input_variables=[StandardVariableRegistry.TAS_MAX], + qualifiers=[], ) # Cold - GD4 = ClimateIndex( + GD4 = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.Excess, + threshold="4 degree_Celsius", + output_unit="degree_Celsius day", + definition="Growing degree days (sum of Tmean > 4 C)", + source=ECAD_ATBD, short_name="GD4", - compute=lambda c: gd4(c), - group=IndexGroup.COLD, - input_variables=[TAS], - qualifiers=[MODIFIABLE_THRESHOLD], - ) - FD = ClimateIndex( + group=IndexGroupRegistry.COLD, + input_variables=[StandardVariableRegistry.TAS], + qualifiers=[], + ) + FD = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.CountOccurrences, + threshold="< 0 degree_Celsius", + output_unit="day", + definition="Number of Frost Days (Tmin < 0C)", + source=ECAD_ATBD, short_name="FD", - compute=lambda c: fd(c), - group=IndexGroup.COLD, - input_variables=[TAS_MIN], - qualifiers=[MODIFIABLE_THRESHOLD], - ) - CFD = ClimateIndex( + group=IndexGroupRegistry.COLD, + input_variables=[StandardVariableRegistry.TAS_MIN], + qualifiers=[], + ) + CFD = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.MaxConsecutiveOccurrence, + threshold="< 0 degree_Celsius", + output_unit="day", + definition="Maximum number of consecutive frost days (Tmin < 0 C)", + source=ECAD_ATBD, short_name="CFD", - compute=lambda c: cfd(c), - group=IndexGroup.COLD, - input_variables=[TAS_MIN], - qualifiers=[MODIFIABLE_THRESHOLD], - ) - HD17 = ClimateIndex( + group=IndexGroupRegistry.COLD, + input_variables=[StandardVariableRegistry.TAS_MIN], + qualifiers=[], + ) + HD17 = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.Deficit, + threshold="17 degree_Celsius", + output_unit="degree_Celsius day", + definition="Heating degree days (sum of Tmean < 17 C)", + source=ECAD_ATBD, short_name="HD17", - compute=lambda c: hd17(c), - group=IndexGroup.COLD, - input_variables=[TAS], - qualifiers=[MODIFIABLE_THRESHOLD], - ) - ID = ClimateIndex( + group=IndexGroupRegistry.COLD, + input_variables=[StandardVariableRegistry.TAS], + qualifiers=[], + ) + ID = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.CountOccurrences, + threshold="< 0 degree_Celsius", + output_unit="day", + definition="Number of sharp Ice Days (Tmax < 0C)", + source=ECAD_ATBD, short_name="ID", - compute=lambda c: id(c), - group=IndexGroup.COLD, - input_variables=[TAS_MAX], - qualifiers=[MODIFIABLE_THRESHOLD], - ) - TG10P = ClimateIndex( + group=IndexGroupRegistry.COLD, + input_variables=[StandardVariableRegistry.TAS_MAX], + qualifiers=[], + ) + TG10P = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.CountOccurrences, + threshold="< 10 doy_per", + output_unit="day", + definition="Days when Tmean < 10th percentile", + source=ECAD_ATBD, short_name="TG10p", - compute=lambda c: tg10p(c), - group=IndexGroup.COLD, - input_variables=[TAS], - qualifiers=[ - QUANTILE_BASED, - MODIFIABLE_QUANTILE_WINDOW, - MODIFIABLE_UNIT, - MODIFIABLE_THRESHOLD, - ], - ) - TN10P = ClimateIndex( + group=IndexGroupRegistry.COLD, + input_variables=[StandardVariableRegistry.TAS], + qualifiers=[QUANTILE_BASED], + doy_window_width=5, + ) + TN10P = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.CountOccurrences, + threshold="< 10 doy_per", + output_unit="day", + definition="Days when Tmin < 10th percentile", + source=ECAD_ATBD, short_name="TN10p", - compute=lambda c: tn10p(c), - group=IndexGroup.COLD, - input_variables=[TAS_MIN], - qualifiers=[ - QUANTILE_BASED, - MODIFIABLE_QUANTILE_WINDOW, - MODIFIABLE_UNIT, - MODIFIABLE_THRESHOLD, - ], - ) - TX10P = ClimateIndex( + group=IndexGroupRegistry.COLD, + input_variables=[StandardVariableRegistry.TAS_MIN], + qualifiers=[QUANTILE_BASED], + doy_window_width=5, + ) + TX10P = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.CountOccurrences, + threshold="< 10 doy_per", + output_unit="day", + definition="Days when Tmax < 10th percentile", + source=ECAD_ATBD, short_name="TX10p", - compute=lambda c: tx10p(c), - group=IndexGroup.COLD, - input_variables=[TAS_MAX], - qualifiers=[ - QUANTILE_BASED, - MODIFIABLE_QUANTILE_WINDOW, - MODIFIABLE_UNIT, - MODIFIABLE_THRESHOLD, - ], - ) - TXN = ClimateIndex( + group=IndexGroupRegistry.COLD, + input_variables=[StandardVariableRegistry.TAS_MAX], + qualifiers=[QUANTILE_BASED], + doy_window_width=5, + ) + TXN = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.Minimum, + output_unit="degree_Celsius", + definition="Minimum daily maximum temperature", + source=ECAD_ATBD, short_name="TXn", - compute=lambda c: txn(c), - group=IndexGroup.COLD, - input_variables=[TAS_MAX], - ) - TNN = ClimateIndex( + group=IndexGroupRegistry.COLD, + input_variables=[StandardVariableRegistry.TAS_MAX], + ) + TNN = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.Minimum, + output_unit="degree_Celsius", + definition="Minimum daily minimum temperature", + source=ECAD_ATBD, short_name="TNn", - compute=lambda c: tnn(c), - group=IndexGroup.COLD, - input_variables=[TAS_MIN], - ) - CSDI = ClimateIndex( + group=IndexGroupRegistry.COLD, + input_variables=[StandardVariableRegistry.TAS_MIN], + ) + CSDI = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.SumOfSpellLengths, + threshold="< 10 doy_per", + output_unit="day", + definition="Cold-spell duration index (days)", + source=ECAD_ATBD, short_name="CSDI", - compute=lambda c: csdi(c), - group=IndexGroup.COLD, - input_variables=[TAS_MIN], - qualifiers=[QUANTILE_BASED, MODIFIABLE_QUANTILE_WINDOW, MODIFIABLE_THRESHOLD], + group=IndexGroupRegistry.COLD, + input_variables=[StandardVariableRegistry.TAS_MIN], + qualifiers=[QUANTILE_BASED], + doy_window_width=5, + min_spell_length=6, ) # Drought - CDD = ClimateIndex( + CDD = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.MaxConsecutiveOccurrence, + threshold="< 1 mm day-1", + output_unit="day", + definition="Maximum consecutive dry days (Precip < 1mm)", + source=ECAD_ATBD, short_name="CDD", - compute=lambda c: cdd(c), - group=IndexGroup.DROUGHT, - input_variables=[PR], + group=IndexGroupRegistry.DROUGHT, + input_variables=[StandardVariableRegistry.PR], ) # Rain - PRCPTOT = ClimateIndex( + PRCPTOT = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.Sum, + threshold=">= 1 mm day-1", + output_unit="mm", + definition="Total precipitation during Wet Days", + source=ECAD_ATBD, short_name="PRCPTOT", - compute=lambda c: prcptot(c), - group=IndexGroup.RAIN, - input_variables=[PR], - ) - RR1 = ClimateIndex( + group=IndexGroupRegistry.RAIN, + input_variables=[StandardVariableRegistry.PR], + ) + RR1 = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.CountOccurrences, + threshold=">= 1 mm day-1", + output_unit="day", + definition="Number of Wet Days (precip >= 1 mm)", + source=ECAD_ATBD, short_name="RR1", - compute=lambda c: rr1(c), - group=IndexGroup.RAIN, - input_variables=[PR], - ) - SDII = ClimateIndex( + group=IndexGroupRegistry.RAIN, + input_variables=[StandardVariableRegistry.PR], + ) + SDII = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.Average, + threshold=">= 1 mm day-1", + output_unit="mm day-1", + definition="Average precipitation during Wet Days (SDII)", + source=ECAD_ATBD, short_name="SDII", - compute=lambda c: sdii(c), - group=IndexGroup.RAIN, - input_variables=[PR], - ) - CWD = ClimateIndex( + group=IndexGroupRegistry.RAIN, + input_variables=[StandardVariableRegistry.PR], + ) + CWD = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.MaxConsecutiveOccurrence, + threshold=">= 1 mm day-1", + output_unit="day", + definition="Maximum consecutive wet days (Precip >= 1mm)", + source=ECAD_ATBD, short_name="CWD", - compute=lambda c: cwd(c), - group=IndexGroup.RAIN, - input_variables=[PR], - ) - R10MM = ClimateIndex( + group=IndexGroupRegistry.RAIN, + input_variables=[StandardVariableRegistry.PR], + ) + R10MM = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.CountOccurrences, + threshold=">= 10 mm day-1", + output_unit="day", + definition="Number of heavy precipitation days (Precip >=10mm)", + source=ECAD_ATBD, short_name="R10mm", - compute=lambda c: r10mm(c), - group=IndexGroup.RAIN, - input_variables=[PR], - ) - R20MM = ClimateIndex( + group=IndexGroupRegistry.RAIN, + input_variables=[StandardVariableRegistry.PR], + ) + R20MM = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.CountOccurrences, + threshold=">= 20 mm day-1", + output_unit="day", + definition="Number of very heavy precipitation days (Precip >= 20mm)", + source=ECAD_ATBD, short_name="R20mm", - compute=lambda c: r20mm(c), - group=IndexGroup.RAIN, - input_variables=[PR], - ) - RX1DAY = ClimateIndex( + group=IndexGroupRegistry.RAIN, + input_variables=[StandardVariableRegistry.PR], + ) + RX1DAY = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.Maximum, + output_unit="mm day-1", + definition="maximum 1-day total precipitation", # from xclim + source=ECAD_ATBD, short_name="RX1day", - compute=lambda c: rx1day(c), - group=IndexGroup.RAIN, - input_variables=[PR], - ) - RX5DAY = ClimateIndex( + group=IndexGroupRegistry.RAIN, + input_variables=[StandardVariableRegistry.PR], + ) + RX5DAY = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.MaxOfRollingSum, + output_unit="mm", + definition="maximum 5-day total precipitation", # from xclim + source=ECAD_ATBD, short_name="RX5day", - compute=lambda c: rx5day(c), - group=IndexGroup.RAIN, - input_variables=[PR], - ) - R75P = ClimateIndex( + group=IndexGroupRegistry.RAIN, + input_variables=[StandardVariableRegistry.PR], + qualifiers=[], + rolling_window_width=5, + ) + R75P = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.CountOccurrences, + threshold=Threshold("> 75 period_per", threshold_min_value="1 mm/day"), + output_unit="day", + definition="Days with RR > 75th percentile of daily amounts (moderate wet days)" + " (d)", + source=ECAD_ATBD, short_name="R75p", - compute=lambda c: r75p(c), - group=IndexGroup.RAIN, - input_variables=[PR], - qualifiers=[QUANTILE_BASED, MODIFIABLE_UNIT], + group=IndexGroupRegistry.RAIN, + input_variables=[StandardVariableRegistry.PR], + qualifiers=[QUANTILE_BASED], ) - R75PTOT = ClimateIndex( + R75PTOT = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.FractionOfTotal, + threshold=Threshold("> 75 period_per", threshold_min_value="1 mm/day"), + output_unit="%", + definition="Precipitation fraction due to moderate wet days" + " (> 75th percentile)", + source=ECAD_ATBD, short_name="R75pTOT", - compute=lambda c: r75ptot(c), - group=IndexGroup.RAIN, - input_variables=[PR], + group=IndexGroupRegistry.RAIN, + input_variables=[StandardVariableRegistry.PR], qualifiers=[QUANTILE_BASED], ) - R95P = ClimateIndex( + R95P = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.CountOccurrences, + threshold=Threshold("> 95 period_per", threshold_min_value="1 mm/day"), + output_unit="day", + definition="Days with RR > 95th percentile of daily amounts (very wet days)" + " (days)", + source=ECAD_ATBD, short_name="R95p", - compute=lambda c: r95p(c), - group=IndexGroup.RAIN, - input_variables=[PR], - qualifiers=[QUANTILE_BASED, MODIFIABLE_UNIT], + group=IndexGroupRegistry.RAIN, + input_variables=[StandardVariableRegistry.PR], + qualifiers=[QUANTILE_BASED], ) - R95PTOT = ClimateIndex( + R95PTOT = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.FractionOfTotal, + threshold=Threshold("> 95 period_per", threshold_min_value="1 mm/day"), + output_unit="%", + definition="Precipitation fraction due to very wet days (> 95th percentile)", + source=ECAD_ATBD, short_name="R95pTOT", - compute=lambda c: r95ptot(c), - group=IndexGroup.RAIN, - input_variables=[PR], + group=IndexGroupRegistry.RAIN, + input_variables=[StandardVariableRegistry.PR], qualifiers=[QUANTILE_BASED], ) - R99P = ClimateIndex( + R99P = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.CountOccurrences, + threshold=Threshold("> 99 period_per", threshold_min_value="1 mm/day"), + output_unit="day", + definition="Days with RR > 99th percentile of daily amounts" + " (extremely wet days)", + source=ECAD_ATBD, short_name="R99p", - compute=lambda c: r99p(c), - group=IndexGroup.RAIN, - input_variables=[PR], - qualifiers=[QUANTILE_BASED, MODIFIABLE_UNIT], + group=IndexGroupRegistry.RAIN, + input_variables=[StandardVariableRegistry.PR], + qualifiers=[QUANTILE_BASED], ) - R99PTOT = ClimateIndex( + R99PTOT = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.FractionOfTotal, + threshold=Threshold("> 99 period_per", threshold_min_value="1 mm/day"), + output_unit="%", + definition="Precipitation fraction due to extremely wet days" + " (> 99th percentile)", + source=ECAD_ATBD, short_name="R99pTOT", - compute=lambda c: r99ptot(c), - group=IndexGroup.RAIN, - input_variables=[PR], + group=IndexGroupRegistry.RAIN, + input_variables=[StandardVariableRegistry.PR], qualifiers=[QUANTILE_BASED], ) # Snow - SD = ClimateIndex( + SD = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.Average, + output_unit="cm", + definition="Mean of daily snow depth", + source=ECAD_ATBD, short_name="SD", - compute=lambda c: sd(c), - group=IndexGroup.SNOW, - input_variables=[PR], - ) - SD1 = ClimateIndex( + group=IndexGroupRegistry.SNOW, + input_variables=[StandardVariableRegistry.PR], + ) + SD1 = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.CountOccurrences, + threshold=">= 1 cm", + output_unit="day", + definition="Snow days (SD >= 1 cm)", + source=ECAD_ATBD, short_name="SD1", - compute=lambda c: sd1(c), - group=IndexGroup.SNOW, - input_variables=[PR], - ) - SD5CM = ClimateIndex( + group=IndexGroupRegistry.SNOW, + input_variables=[StandardVariableRegistry.PR], + ) + SD5CM = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.CountOccurrences, + output_unit="day", + threshold=">= 5 cm", + definition="Number of days with snow depth >= 5 cm", + source=ECAD_ATBD, short_name="SD5cm", - compute=lambda c: sd5cm(c), - group=IndexGroup.SNOW, - input_variables=[PR], - ) - SD50CM = ClimateIndex( + group=IndexGroupRegistry.SNOW, + input_variables=[StandardVariableRegistry.PR], + ) + SD50CM = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.CountOccurrences, + threshold=">= 50 cm", + output_unit="day", + definition="Number of days with snow depth >= 50 cm", + source=ECAD_ATBD, short_name="SD50cm", - compute=lambda c: sd50cm(c), - group=IndexGroup.SNOW, - input_variables=[PR], + group=IndexGroupRegistry.SNOW, + input_variables=[StandardVariableRegistry.PR], ) # Compound (precipitation and temperature) - CD = ClimateIndex( + CD = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.CountOccurrences, + threshold=[ + "< 25 doy_per", + Threshold("< 25 period_per", threshold_min_value="1 mm/day"), + ], + output_unit="day", + definition="Days with TG < 25th percentile of daily mean temperature and" + " RR <25th percentile of daily precipitation sum (cold/dry days)", + source=ECAD_ATBD, short_name="CD", - compute=lambda c: cd(c), - group=IndexGroup.COMPOUND, - input_variables=[TAS, PR], - qualifiers=[QUANTILE_BASED, MODIFIABLE_QUANTILE_WINDOW], - ) - CW = ClimateIndex( + group=IndexGroupRegistry.COMPOUND, + input_variables=[StandardVariableRegistry.TAS, StandardVariableRegistry.PR], + qualifiers=[QUANTILE_BASED], + doy_window_width=5, + ) + CW = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.CountOccurrences, + threshold=[ + "< 25 doy_per", + Threshold("> 75 period_per", threshold_min_value="1 mm/day"), + ], + output_unit="day", + definition="Days with TG < 25th percentile of daily mean temperature and" + " RR >75th percentile of daily precipitation sum (cold/wet days)", + source=ECAD_ATBD, short_name="CW", - compute=lambda c: cw(c), - group=IndexGroup.COMPOUND, - input_variables=[TAS, PR], - qualifiers=[QUANTILE_BASED, MODIFIABLE_QUANTILE_WINDOW], - ) - WD = ClimateIndex( + group=IndexGroupRegistry.COMPOUND, + input_variables=[StandardVariableRegistry.TAS, StandardVariableRegistry.PR], + qualifiers=[QUANTILE_BASED], + doy_window_width=5, + ) + WD = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.CountOccurrences, + threshold=[ + "> 75 doy_per", + Threshold("< 25 period_per", threshold_min_value="1 mm/day"), + ], + output_unit="day", + definition="Days with TG > 75th percentile of daily mean temperature and" + " RR <25th percentile of daily precipitation sum (warm/dry days)", + source=ECAD_ATBD, short_name="WD", - compute=lambda c: wd(c), - group=IndexGroup.COMPOUND, - input_variables=[TAS, PR], - qualifiers=[QUANTILE_BASED, MODIFIABLE_QUANTILE_WINDOW], - ) - WW = ClimateIndex( + group=IndexGroupRegistry.COMPOUND, + input_variables=[StandardVariableRegistry.TAS, StandardVariableRegistry.PR], + qualifiers=[QUANTILE_BASED], + doy_window_width=5, + ) + WW = StandardIndex( + reference=ECAD_REFERENCE, + generic_indicator=GenericIndicatorRegistry.CountOccurrences, + threshold=[ + "> 75 doy_per", + Threshold("> 75 period_per", threshold_min_value="1 mm/day"), + ], + output_unit="day", + definition="Days with TG > 75th percentile of daily mean temperature and" + " RR >75th percentile of daily precipitation sum (warm/wet days)", + source=ECAD_ATBD, short_name="WW", - compute=lambda c: ww(c), - group=IndexGroup.COMPOUND, - input_variables=[TAS, PR], - qualifiers=[QUANTILE_BASED, MODIFIABLE_QUANTILE_WINDOW], + group=IndexGroupRegistry.COMPOUND, + input_variables=[StandardVariableRegistry.TAS, StandardVariableRegistry.PR], + qualifiers=[QUANTILE_BASED], + doy_window_width=5, ) - - def __init__( - self, - climate_index: ClimateIndex, - ): - self.climate_index = climate_index - self.climate_index.definition = _get_clix_definition(climate_index.short_name) - self.climate_index.source = ECAD_ATBD - - @staticmethod - def lookup(query: str) -> ClimateIndex: - if isinstance(query, EcadIndex): - return query.value - for e in EcadIndex: - if e.short_name.upper() == query.upper(): - return e - raise InvalidIcclimArgumentError(f"Unknown ECA&D index {query}.") - - @property - def group(self): - return self.climate_index.group - - @property - def short_name(self): - return self.climate_index.short_name - - @property - def definition(self): - return self.climate_index.definition - - @property - def compute(self): - return self.climate_index.compute - - @property - def input_variables(self): - return self.climate_index.input_variables - - @property - def qualifiers(self): - return self.climate_index.qualifiers - - @property - def source(self): - return self.climate_index.source - - @staticmethod - def list() -> list[str]: - """ - Get a a string list of ``EcadIndex`` enum's indices formatted in a readable - fashion. - """ - return [f"{i.group.value} | {i.short_name} | {i.definition}" for i in EcadIndex] - - -def get_season_excluded_indices() -> list[EcadIndex]: - """List of indices which cannot be computed with seasonal slice_mode.""" - return [ - EcadIndex.WSDI, - EcadIndex.CSU, - EcadIndex.CFD, - EcadIndex.CSDI, - EcadIndex.CDD, - EcadIndex.CWD, - EcadIndex.RX5DAY, - EcadIndex.CD, - EcadIndex.CW, - EcadIndex.WD, - EcadIndex.WW, - ] diff --git a/icclim/clix_meta/__init__.py b/icclim/generic_indices/__init__.py similarity index 100% rename from icclim/clix_meta/__init__.py rename to icclim/generic_indices/__init__.py diff --git a/icclim/generic_indices/cf_var_metadata.py b/icclim/generic_indices/cf_var_metadata.py new file mode 100644 index 00000000..6e61b360 --- /dev/null +++ b/icclim/generic_indices/cf_var_metadata.py @@ -0,0 +1,246 @@ +from __future__ import annotations + +import dataclasses +from typing import Hashable, TypedDict + +from icclim.models.constants import PART_OF_A_WHOLE_UNIT +from icclim.models.registry import Registry + + +class IndicatorMetadata(TypedDict): + identifier: str + standard_name: str + long_name: str + cell_methods: str + + +@dataclasses.dataclass +class StandardVariable(Hashable): + short_name: str + standard_name: str + long_name: str + aliases: list[str] + default_units: str + + def __hash__(self) -> int: + return hash(self.short_name + self.standard_name) + + def get_metadata(self): + return dict( + standard_name=self.standard_name, + long_name=self.long_name, + short_name=self.short_name, + ) + + +class StandardVariableRegistry(Registry): + _item_class = StandardVariable + PR = StandardVariable( + short_name="pr", + standard_name="precipitation_flux", + long_name="precipitation", + aliases=[ + "pr", + "prAdjust", + "prec", + "rr", + "precip", + "RR", + "PRECIP", + "Precip", + ], + default_units="mm", + ) + TAS = StandardVariable( + short_name="tg", + standard_name="average_air_temperature", + long_name="average air temperature", + aliases=[ + "tas", + "tavg", + "ta", + "tasAdjust", + "tmean", + "tm", + "tg", + "meant", + "TMEAN", + "Tmean", + "TM", + "TG", + "MEANT", + "meanT", + "tasmidpoint", + ], + default_units="degC", + ) + TAS_MIN = StandardVariable( + short_name="tn", + standard_name="minimum_air_temperature", # not CF nor CMIP (air_temperature) + long_name="minimum air temperature", + aliases=[ + "tasmin", + "tasminAdjust", + "tmin", + "tn", + "mint", + "TMIN", + "Tmin", + "TN", + "MINT", + "minT", + ], + default_units="degC", + ) + TAS_MAX = StandardVariable( + short_name="tx", + standard_name="maximum_air_temperature", # not CF nor CMIP (air_temperature) + long_name="maximum air temperature", + aliases=[ + "tasmax", + "tasmaxAdjust", + "tmax", + "tx", + "maxt", + "TMAX", + "Tmax", + "TX", + "MAXT", + "maxT", + ], + default_units="degC", + ) + HURS = StandardVariable( + short_name="hurs", + standard_name="relative_humidity", + long_name="relative humidity", + aliases=["hurs", "hursAdjust", "rh", "RH"], + default_units="1", # % + ) + PS = StandardVariable( + short_name="ps", + standard_name="surface_air_pressure ", + long_name="Surface Air Pressure", + aliases=["ps"], + default_units="Pa", + ) + PSL = StandardVariable( + short_name="psl", + standard_name="air_pressure_at_sea_level ", + long_name="Sea Level Pressure", + aliases=["psl", "mslp", "slp", "pp", "MSLP", "SLP", "PP"], + default_units="Pa", + ) + SND = StandardVariable( + short_name="snd", + standard_name="surface_snow_thickness", + long_name="snow thickness", + aliases=["snd", "sd", "SD"], + default_units="m", + ) + SUND = StandardVariable( + short_name="sund", + standard_name="duration_of_sunshine", + long_name="duration of sunshine", + aliases=["sund", "ss", "SS"], + default_units="s", + ) + WSGS_MAX = StandardVariable( + short_name="wsgs_max", + standard_name="wind_speed_of_gust", + long_name="wind speed of gust", + aliases=["wsgsmax", "fx", "FX"], + default_units="m/s", + ) + SFC_WIND = StandardVariable( + short_name="sfcWind", + standard_name="wind_speed", + long_name="Near-Surface Wind Speed", + aliases=["sfcWind", "sfcwind", "fg", "FG"], + default_units="m/s", + ) + SNW = StandardVariable( + short_name="snw", + standard_name="surface_snow_amount", + long_name="surface snow amount", + aliases=["snw", "swe", "SW"], + default_units="kg m-2", + ) + EVSPSBL = StandardVariable( + short_name="evspsbl", + standard_name="water_evapotranspiration_flux", + long_name="Evaporation Including Sublimation and Transpiration", + aliases=["evspsbl", "water_evaporation_flux"], + default_units="kg m-2 s-1", + ) + HUSS = StandardVariable( + short_name="huss", + standard_name="specific_humidity", + long_name="Near-Surface Specific Humidity", + aliases=["huss"], + default_units=PART_OF_A_WHOLE_UNIT, + ) + UAS = StandardVariable( + short_name="uas", + standard_name="eastward_wind", + long_name="Eastward Near-Surface Wind", + aliases=["uas"], + default_units="m s-1", + ) + VAS = StandardVariable( + short_name="vas", + standard_name="northward_wind", + long_name="Northward Near-Surface Wind", + aliases=["vas"], + default_units="m s-1", + ) + CLT = StandardVariable( + short_name="clt", + standard_name="cloud_area_fraction", + long_name="Total Cloud Cover Percentage", + aliases=["clt"], + default_units="m s-1", + ) + RSDS = StandardVariable( + short_name="rsds", + standard_name="surface_downwelling_shortwave_flux_in_air", + long_name="Surface Downwelling Shortwave Radiation", + aliases=["rsds", "surface_downwelling_shortwave_flux"], + default_units="W m-2", + ) + RLDS = StandardVariable( + short_name="rlds", + standard_name="surface_downwelling_longwave_flux_in_air", + long_name="Surface Downwelling Longwave Radiation", + aliases=["rlds", "surface_downwelling_longwave_flux"], + default_units="W m-2", + ) + OROG = StandardVariable( + short_name="orog", + standard_name="surface_altitude", + long_name="Surface Altitude", + aliases=["orog"], + default_units="m", + ) + SFTLF = StandardVariable( + short_name="sftlf", + standard_name="land_area_fraction", + long_name="Percentage of the Grid Cell Occupied by Land", + aliases=["sftlf"], + default_units="%", + ) + # X = StandardVariable( + # short_name="x", + # standard_name="y", + # long_name="z", + # aliases=["x"], + # default_units="w", + # ) + # todo add tier1 and tier2 aliases from cmip6/cordex https://docs.google.com/spreadsheets/d/1qUauozwXkq7r1g-L4ALMIkCNINIhhCPx/edit?rtpof=true&sd=true#gid=1672965248 # noqa + + @staticmethod + def get_item_aliases(item: StandardVariable) -> list[str]: + aliases = list(map(str.upper, item.aliases)) + aliases.append(item.standard_name.upper()) + aliases.append(item.long_name.upper()) + return aliases diff --git a/icclim/generic_indices/generic_indicators.py b/icclim/generic_indices/generic_indicators.py new file mode 100644 index 00000000..3f22e873 --- /dev/null +++ b/icclim/generic_indices/generic_indicators.py @@ -0,0 +1,1157 @@ +from __future__ import annotations + +import abc +from abc import ABC +from datetime import timedelta +from functools import partial, reduce +from typing import Any, Callable +from warnings import warn + +import numpy +import numpy as np +import xarray as xr +from jinja2 import Environment +from xarray import DataArray +from xarray.core.resample import DataArrayResample +from xarray.core.rolling import DataArrayRolling +from xclim.core.bootstrapping import percentile_bootstrap +from xclim.core.calendar import build_climatology_bounds, resample_doy, select_time +from xclim.core.cfchecks import cfcheck_from_name +from xclim.core.datachecks import check_freq +from xclim.core.missing import MissingBase +from xclim.core.options import MISSING_METHODS, MISSING_OPTIONS, OPTIONS +from xclim.core.units import convert_units_to, rate2amount, to_agg_units +from xclim.core.utils import PercentileDataArray +from xclim.indices import run_length + +from icclim.generic_indices.generic_templates import INDICATORS_TEMPLATES_EN +from icclim.icclim_exceptions import InvalidIcclimArgumentError +from icclim.models.cf_calendar import CfCalendarRegistry +from icclim.models.climate_variable import ClimateVariable +from icclim.models.constants import ( + GROUP_BY_METHOD, + GROUP_BY_REF_AND_RESAMPLE_STUDY_METHOD, + PART_OF_A_WHOLE_UNIT, + REFERENCE_PERIOD_ID, + RESAMPLE_METHOD, + UNITS_ATTRIBUTE_KEY, +) +from icclim.models.frequency import RUN_INDEXER, Frequency, FrequencyRegistry +from icclim.models.index_config import IndexConfig +from icclim.models.logical_link import LogicalLink +from icclim.models.operator import Operator +from icclim.models.registry import Registry +from icclim.models.threshold import Threshold + +jinja_env = Environment() + + +class MissingMethodLike(metaclass=abc.ABCMeta): + """workaround xclim missing type""" + + def execute(self, *args, **kwargs) -> MissingBase: + ... + + def validate(self, *args, **kwargs) -> bool: + ... + + +class Indicator(metaclass=abc.ABCMeta): + standard_name: str + long_name: str + cell_methods: str + + templated_properties = [ + "standard_name", + "long_name", + "cell_methods", + ] + + @abc.abstractmethod + def __call__(self, *args, **kwargs) -> DataArray: + ... + + @abc.abstractmethod + def preprocess(self, *args, **kwargs) -> list[DataArray]: + ... + + @abc.abstractmethod + def postprocess(self, *args, **kwargs) -> DataArray: + ... + + +class ResamplingIndicator(Indicator, ABC): + """Abstract class for indicators. + It implements some preprocessing common logic: + * + + """ + + missing: str + missing_options: dict | None + + def __init__(self, missing="from_context", missing_options=None): + self.missing_options = missing_options + self.missing = missing + if self.missing == "from_context" and self.missing_options is not None: + raise ValueError( + "Cannot set `missing_options` with `missing` method being from context." + ) + missing_method: MissingMethodLike = MISSING_METHODS[self.missing] # noqa typing + self._missing = missing_method.execute + if self.missing_options: + missing_method.validate(**self.missing_options) + super().__init__() + + def preprocess( + self, + climate_vars: list[ClimateVariable], + jinja_scope: dict[str, Any], + src_freq: Frequency, + ) -> list[ClimateVariable]: + _check_data(climate_vars, src_freq.pandas_freq) + _check_cf(climate_vars) + self.format(jinja_scope=jinja_scope) + return climate_vars + + def postprocess( + self, + result: DataArray, + climate_vars: list[ClimateVariable], + output_freq: str, + src_freq: str, + indexer: dict, + out_unit: str | None, + ): + if out_unit is not None: + result = convert_units_to(result, out_unit) + if self.missing != "skip" and indexer is not None: + # reference variable is a subset of the studied variable, + # so no need to check it. + das = filter(lambda cv: not cv.is_reference, climate_vars) + das = map(lambda cv: cv.studied_data, das) + das = list(das) + if "time" in result.dims: + result = self._handle_missing_values( + resample_freq=output_freq, + src_freq=src_freq, + indexer=indexer, + in_data=das, + out_data=result, + ) + for prop in self.templated_properties: + result.attrs[prop] = getattr(self, prop) + result.attrs["history"] = "" + return result + + def format(self, jinja_scope: dict): + for templated_property in self.templated_properties: + template = jinja_env.from_string( + getattr(self, templated_property), + globals=jinja_scope, + ) + setattr(self, templated_property, template.render()) + + def _handle_missing_values( + self, + in_data: list[DataArray], + resample_freq: str, + src_freq: str, + indexer: dict | None, + out_data: DataArray, + ) -> DataArray: + options = self.missing_options or OPTIONS[MISSING_OPTIONS].get(self.missing, {}) + # We flag periods according to the missing method. skip variables without a time + # coordinate. + missing_method: MissingMethodLike = MISSING_METHODS[self.missing] # noqa typing + miss = ( + missing_method.execute(da, resample_freq, src_freq, options, indexer) + for da in in_data + if "time" in da.coords + ) + # Reduce by or and broadcast to ensure the same length in time + # When indexing is used and there are no valid points in the last period, + # mask will not include it + mask = reduce(np.logical_or, miss) # noqa typing + if isinstance(mask, DataArray) and mask.time.size < out_data.time.size: + mask = mask.reindex(time=out_data.time, fill_value=True) + return out_data.where(~mask) + + +def _check_cf(climate_vars: list[ClimateVariable]): + """Compare metadata attributes to CF-Convention standards. + + Default cfchecks use the specifications in `xclim.core.utils.VARIABLES`, + assuming the indicator's inputs are using the CMIP6/xclim variable names + correctly. + Variables absent from these default specs are silently ignored. + + When subclassing this method, use functions decorated using + `xclim.core.options.cfcheck`. + """ + for da in climate_vars: + try: + cfcheck_from_name(str(da.name), da) + except KeyError: + # Silently ignore unknown variables. + pass + + +def _check_data(climate_vars: list[ClimateVariable], src_freq: str): + if src_freq is None: + return + for climate_var in climate_vars: + da = climate_var.studied_data + if "time" in da.coords and da.time.ndim == 1 and len(da.time) > 3: + check_freq(da, src_freq, strict=True) + + +class GenericIndicator(ResamplingIndicator): + name: str + + def __init__( + self, + name: str, + process: Callable[..., DataArray], + select_time_before_computation: bool = True, + check_vars: ( + Callable[[list[ClimateVariable], GenericIndicator], None] | None + ) = None, + sampling_methods: list[str] = None, + **kwargs, + ): + super().__init__(**kwargs) + local = INDICATORS_TEMPLATES_EN + self.name = name + self.process = process + self.select_time_before_computation = select_time_before_computation + self.standard_name = local[name]["standard_name"] + self.cell_methods = local[name]["cell_methods"] + self.long_name = local[name]["long_name"] + self.check_vars = check_vars + self.sampling_methods = ( + sampling_methods if sampling_methods is not None else [RESAMPLE_METHOD] + ) + + def preprocess( # noqa signature != from super + self, + climate_vars: list[ClimateVariable], + jinja_scope: dict[str, Any], + output_frequency: Frequency, + src_freq: Frequency, + output_unit: str | None, + coef: float | None, + sampling_method: str, + is_compared_to_reference: bool, + ) -> list[ClimateVariable]: + if not _same_freq_for_all(climate_vars): + raise InvalidIcclimArgumentError( + "All variables must have the same time frequency (for example daily) to" + " be compared with each others, but this was not the case." + ) + if self.check_vars is not None: + self.check_vars(climate_vars, self) + if sampling_method not in self.sampling_methods: + raise InvalidIcclimArgumentError( + f"{self.name} can only be computed with the following" + f" sampling_method(s): {self.sampling_methods}" + ) + if is_compared_to_reference and sampling_method == RESAMPLE_METHOD: + raise InvalidIcclimArgumentError( + "It does not make sense to resample the reference variable if it is" + " already a subsample of the studied variable. Try setting" + f" `sampling_method='{GROUP_BY_REF_AND_RESAMPLE_STUDY_METHOD}'`" + f" instead." + ) + if output_unit is not None and _is_amount_unit(output_unit): + for climate_var in climate_vars: + current_unit = climate_var.studied_data.attrs.get( + UNITS_ATTRIBUTE_KEY, None + ) + if current_unit is not None and not _is_amount_unit(current_unit): + climate_var.studied_data = rate2amount( + climate_var.studied_data, out_units=output_unit + ) + if coef is not None: + for climate_var in climate_vars: + climate_var.studied_data = coef * climate_var.studied_data + if output_frequency.indexer and self.select_time_before_computation: + for climate_var in climate_vars: + climate_var.studied_data = select_time( + climate_var.studied_data, **output_frequency.indexer, drop=True + ) + return super().preprocess( + climate_vars=climate_vars, + jinja_scope=jinja_scope, + src_freq=src_freq, + ) + + def __call__(self, config: IndexConfig) -> DataArray: + # icclim wrapper + src_freq = config.climate_variables[0].source_frequency + jinja_scope = { + "output_freq": config.frequency, + "source_freq": src_freq, + "min_spell_length": config.min_spell_length, + "rolling_window_width": config.rolling_window_width, + "np": numpy, + "enumerate": enumerate, + "len": len, + "climate_vars": _get_inputs_metadata( + config.climate_variables, src_freq, config.indicator_name + ), + "is_compared_to_reference": config.is_compared_to_reference, + "reference_period": config.reference_period, + } + climate_vars = self.preprocess( + climate_vars=config.climate_variables, + jinja_scope=jinja_scope, + output_frequency=config.frequency, + src_freq=src_freq, + output_unit=config.out_unit, + coef=config.coef, + sampling_method=config.sampling_method, + is_compared_to_reference=config.is_compared_to_reference, + ) + result = self.process( + climate_vars=climate_vars, + resample_freq=config.frequency, + min_spell_length=config.min_spell_length, + rolling_window_width=config.rolling_window_width, + group_by_freq=config.frequency.group_by_key, + is_compared_to_reference=config.is_compared_to_reference, + logical_link=config.logical_link, + date_event=config.date_event, + source_freq_delta=src_freq.delta, + to_percent=config.out_unit == "%", + sampling_method=config.sampling_method, + ) + return self.postprocess( + result, + climate_vars=climate_vars, + output_freq=config.frequency.pandas_freq, + src_freq=src_freq.pandas_freq, + indexer=config.frequency.indexer, + out_unit=config.out_unit, + ) + + +def count_occurrences( + climate_vars: list[ClimateVariable], + resample_freq: Frequency, + logical_link: LogicalLink, + date_event: bool, + to_percent: bool, + **kwargs, # noqa +) -> DataArray: + if date_event: + reducer_op = _count_occurrences_with_date + else: + reducer_op = partial(DataArray.sum, dim="time") + merged_exceedances = _compute_exceedances( + climate_vars, resample_freq.pandas_freq, logical_link + ) + result = reducer_op(merged_exceedances.resample(time=resample_freq.pandas_freq)) + if to_percent: + result = _to_percent(result, resample_freq) + result.attrs[UNITS_ATTRIBUTE_KEY] = "%" + return result + else: + return to_agg_units(result, climate_vars[0].studied_data, "count") + + +def max_consecutive_occurrence( + climate_vars: list[ClimateVariable], + resample_freq: Frequency, + logical_link: LogicalLink, + date_event: bool, + source_freq_delta: timedelta, + **kwargs, # noqa +) -> DataArray: + merged_exceedances = _compute_exceedances( + climate_vars, resample_freq.pandas_freq, logical_link + ) + # todo wait for xclim#1134 to benefit from the run_length algo update + rle = run_length.rle(merged_exceedances, dim="time", index="first") + if resample_freq.indexer: + rle = select_time(rle, **resample_freq.indexer) + resampled = rle.resample(time=resample_freq.pandas_freq) + if date_event: + result = _consecutive_occurrences_with_dates(resampled, source_freq_delta) + else: + result = resampled.max(dim="time") + return to_agg_units(result, climate_vars[0].studied_data, "count") + + +def sum_of_spell_lengths( + climate_vars: list[ClimateVariable], + resample_freq: Frequency, + logical_link: LogicalLink, + min_spell_length: int, + **kwargs, # noqa +) -> DataArray: + merged_exceedances = _compute_exceedances( + climate_vars, resample_freq.pandas_freq, logical_link + ) + # todo wait for xclim#1134 to benefit from the run_length algo update + rle = run_length.rle(merged_exceedances, dim="time", index="first") + cropped_rle = rle.where(rle >= min_spell_length, other=0) + if resample_freq.indexer: + cropped_rle = select_time(cropped_rle, **resample_freq.indexer) + result = cropped_rle.resample(time=resample_freq.pandas_freq).max(dim="time") + return to_agg_units(result, climate_vars[0].studied_data, "count") + + +def excess( + climate_vars: list[ClimateVariable], + resample_freq: Frequency, + **kwargs, # noqa +) -> DataArray: + op, study, threshold = _get_single_var(climate_vars) + if threshold.is_doy_per_threshold: + thresh = resample_doy(threshold.value, study) + else: + thresh = threshold.value + res = ( + (study - thresh) + .clip(min=0) + .resample(time=resample_freq.pandas_freq) + .sum(dim="time") + ) + return to_agg_units(res, study, "delta_prod") + + +def deficit( + climate_vars: list[ClimateVariable], + resample_freq: Frequency, + **kwargs, # noqa +) -> DataArray: + op, study, threshold = _get_single_var(climate_vars) + if threshold.is_doy_per_threshold: + thresh = resample_doy(threshold.value, study) + else: + thresh = threshold.value + res = ( + (thresh - study) + .clip(min=0) + .resample(time=resample_freq.pandas_freq) + .sum(dim="time") + ) + return to_agg_units(res, study, "delta_prod") + + +def fraction_of_total( + climate_vars: list[ClimateVariable], + resample_freq: Frequency, + to_percent: bool, + **kwargs, # noqa +) -> DataArray: + op, study, threshold = _get_single_var(climate_vars) + if threshold.threshold_min_value: + total = ( + study.where(op(study, threshold.threshold_min_value.value)) + .resample(time=resample_freq.pandas_freq) + .sum(dim="time") + ) + else: + total = study.resample(time=resample_freq.pandas_freq).sum(dim="time") + exceedance = _compute_exceedance( + operator=op, + study=study, + threshold=threshold.value, + freq=resample_freq.pandas_freq, + bootstrap=_must_run_bootstrap(study, threshold), + is_doy_per=threshold.is_doy_per_threshold, + ).squeeze() + over = ( + study.where(exceedance, 0) + .resample(time=resample_freq.pandas_freq) + .sum(dim="time") + ) + res = over / total + if to_percent: + res = res * 100 + res.attrs[UNITS_ATTRIBUTE_KEY] = "%" + else: + res.attrs[UNITS_ATTRIBUTE_KEY] = PART_OF_A_WHOLE_UNIT + return res + + +def maximum( + climate_vars: list[ClimateVariable], + resample_freq: Frequency, + date_event: bool, + **kwargs, # noqa +) -> DataArray: + return _run_simple_reducer( + climate_vars=climate_vars, + resample_freq=resample_freq, + reducer_op=DataArrayResample.max, + date_event=date_event, + ) + + +def minimum( + climate_vars: list[ClimateVariable], + resample_freq: Frequency, + date_event: bool, + **kwargs, # noqa +) -> DataArray: + return _run_simple_reducer( + climate_vars=climate_vars, + resample_freq=resample_freq, + reducer_op=DataArrayResample.min, + date_event=date_event, + ) + + +def average( + climate_vars: list[ClimateVariable], + resample_freq: Frequency, + **kwargs, # noqa +) -> DataArray: + return _run_simple_reducer( + climate_vars=climate_vars, + resample_freq=resample_freq, + reducer_op=DataArrayResample.mean, + date_event=False, + ) + + +def sum( + climate_vars: list[ClimateVariable], + resample_freq: Frequency, + **kwargs, # noqa +) -> DataArray: + return _run_simple_reducer( + climate_vars=climate_vars, + resample_freq=resample_freq, + reducer_op=DataArrayResample.sum, + date_event=False, + ) + + +def standard_deviation( + climate_vars: list[ClimateVariable], + resample_freq: Frequency, + **kwargs, # noqa +) -> DataArray: + return _run_simple_reducer( + climate_vars, resample_freq, DataArrayResample.std, date_event=False + ) + + +def max_of_rolling_sum( + climate_vars: list[ClimateVariable], + resample_freq: Frequency, + rolling_window_width: int, + date_event: bool, + source_freq_delta: timedelta, + **kwargs, # noqa +): + return _run_rolling_reducer( + climate_vars=climate_vars, + resample_freq=resample_freq, + rolling_window_width=rolling_window_width, + rolling_op=DataArrayRolling.sum, + resampled_op=DataArrayResample.max, + date_event=date_event, + source_freq_delta=source_freq_delta, + ) + + +def min_of_rolling_sum( + climate_vars: list[ClimateVariable], + resample_freq: Frequency, + rolling_window_width: int, + date_event: bool, + source_freq_delta: timedelta, + **kwargs, # noqa +): + return _run_rolling_reducer( + climate_vars=climate_vars, + resample_freq=resample_freq, + rolling_window_width=rolling_window_width, + rolling_op=DataArrayRolling.sum, + resampled_op=DataArrayResample.min, + date_event=date_event, + source_freq_delta=source_freq_delta, + ) + + +def min_of_rolling_average( + climate_vars: list[ClimateVariable], + resample_freq: Frequency, + rolling_window_width: int, + date_event: bool, + source_freq_delta: timedelta, + **kwargs, # noqa +): + return _run_rolling_reducer( + climate_vars=climate_vars, + resample_freq=resample_freq, + rolling_window_width=rolling_window_width, + rolling_op=DataArrayRolling.mean, + resampled_op=DataArrayResample.min, + date_event=date_event, + source_freq_delta=source_freq_delta, + ) + + +def max_of_rolling_average( + climate_vars: list[ClimateVariable], + resample_freq: Frequency, + rolling_window_width: int, + date_event: bool, + source_freq_delta: timedelta, + **kwargs, # noqa +): + return _run_rolling_reducer( + climate_vars=climate_vars, + resample_freq=resample_freq, + rolling_window_width=rolling_window_width, + rolling_op=DataArrayRolling.mean, + resampled_op=DataArrayResample.max, + date_event=date_event, + source_freq_delta=source_freq_delta, + ) + + +def mean_of_difference( + climate_vars: list[ClimateVariable], + resample_freq: Frequency, + **kwargs, # noqa +): + study, ref = _get_couple_of_var(climate_vars, "mean_of_difference") + mean_of_diff = (study - ref).resample(time=resample_freq.pandas_freq).mean() + mean_of_diff.attrs["units"] = study.attrs["units"] + return mean_of_diff + + +def difference_of_extremes( + climate_vars: list[ClimateVariable], + resample_freq: Frequency, + **kwargs, # noqa +): + study, ref = _get_couple_of_var(climate_vars, "difference_of_extremes") + max_study = study.resample(time=resample_freq.pandas_freq).max() + min_ref = ref.resample(time=resample_freq.pandas_freq).min() + diff_of_extremes = max_study - min_ref + diff_of_extremes.attrs["units"] = study.attrs["units"] + return diff_of_extremes + + +def mean_of_absolute_one_time_step_difference( + climate_vars: list[ClimateVariable], + resample_freq: Frequency, + **kwargs, # noqa +) -> DataArray: + """ + Generification of ECAD's vDTR index. + + Parameters + ---------- + climate_vars : List[ClimateVariable] + The two climate variables necessary to compute the indicator. + resample_freq : Frequency + Expected frequency of the output. + kwargs : dict + Ignored keyword arguments (for compatibility). + + Returns + ------- + DataArray + mean_of_absolute_one_time_step_difference as a xarray.DataArray + """ + study, ref = _get_couple_of_var( + climate_vars, "mean_of_absolute_one_time_step_difference" + ) + one_time_step_diff = (study - ref).diff(dim="time") + res = abs(one_time_step_diff).resample(time=resample_freq.pandas_freq).mean() + res.attrs["units"] = study.attrs["units"] + return res + + +def difference_of_means( + climate_vars: list[ClimateVariable], + to_percent: bool, + resample_freq: Frequency, + sampling_method: str, + **kwargs, # noqa +): + study, ref = _get_couple_of_var(climate_vars, "difference_of_means") + if sampling_method == GROUP_BY_METHOD: + if resample_freq.group_by_key == RUN_INDEXER: + mean_study = study.mean(dim="time") + mean_ref = ref.mean(dim="time") + else: + mean_study = study.groupby(resample_freq.group_by_key).mean() + mean_ref = ref.groupby(resample_freq.group_by_key).mean() + elif sampling_method == RESAMPLE_METHOD: + mean_study = study.resample(time=resample_freq.pandas_freq).mean() + mean_ref = ref.resample(time=resample_freq.pandas_freq).mean() + elif sampling_method == GROUP_BY_REF_AND_RESAMPLE_STUDY_METHOD: + if ( + resample_freq.group_by_key == RUN_INDEXER + or resample_freq == FrequencyRegistry.YEAR + ): + mean_study = study.resample(time=resample_freq.pandas_freq).mean() + # data is already filtered with only the indexed values. + # Thus there is only one "group". + mean_ref = ref.mean(dim="time") + else: + return diff_of_means_of_resampled_x_by_groupedby_y( + resample_freq, to_percent, study, ref + ) + else: + raise NotImplementedError(f"Unknown sampling_method: '{sampling_method}'.") + diff_of_means = mean_study - mean_ref + if to_percent: + diff_of_means = diff_of_means / mean_ref * 100 + diff_of_means.attrs["units"] = "%" + else: + diff_of_means.attrs["units"] = study.attrs["units"] + return diff_of_means + + +def diff_of_means_of_resampled_x_by_groupedby_y( + resample_freq: Frequency, to_percent: bool, study: DataArray, ref: DataArray +) -> DataArray: + mean_ref = ref.groupby(resample_freq.group_by_key).mean() + acc = [] + if resample_freq == FrequencyRegistry.MONTH: + key = "month" + dt_selector = lambda x: x.time.dt.month # noqa lamdab assigned + elif resample_freq == FrequencyRegistry.DAY: + key = "dayofyear" + dt_selector = lambda x: x.time.dt.dayofyear # noqa lamdab assigned + else: + raise NotImplementedError( + f"Can't use {GROUP_BY_REF_AND_RESAMPLE_STUDY_METHOD}" + f" with the frequency {resample_freq.long_name}." + ) + for label, sample in study.resample(time=resample_freq.pandas_freq): + sample_mean = sample.mean(dim="time") + ref_group_mean = mean_ref.sel({key: dt_selector(sample).values[0]}) + sample_diff_of_means = sample_mean - ref_group_mean + if to_percent: + sample_diff_of_means = sample_diff_of_means / ref_group_mean * 100 + del sample_diff_of_means[key] + sample_diff_of_means = sample_diff_of_means.expand_dims(time=[label]) + acc.append(sample_diff_of_means) + diff_of_means = xr.concat(acc, dim="time") + if to_percent: + diff_of_means.attrs["units"] = "%" + else: + diff_of_means.attrs["units"] = study.attrs["units"] + return diff_of_means + + +def check_single_var(climate_vars: list[ClimateVariable], indicator: GenericIndicator): + if len(climate_vars) > 1: + raise InvalidIcclimArgumentError( + f"{indicator.name} can only be computed on a" f" single variable." + ) + + +def check_couple_of_vars( + climate_vars: list[ClimateVariable], indicator: GenericIndicator +): + if len(climate_vars) != 2: + raise InvalidIcclimArgumentError( + f"{indicator.name} can only be computed on two variables sharing the same" + f" unit (e.g. 2 temperatures). Either provide a `base_period_time_range` to" + f" create a reference variable or directly provide a secondary variable" + f" with `in_files` or `var_name`." + ) + + +class GenericIndicatorRegistry(Registry): + def __init__(self): + super().__init__() + + _item_class = GenericIndicator + + CountOccurrences = GenericIndicator("count_occurrences", count_occurrences) + MaxConsecutiveOccurrence = GenericIndicator( + "max_consecutive_occurrence", + max_consecutive_occurrence, + select_time_before_computation=False, + ) + SumOfSpellLengths = GenericIndicator( + "sum_of_spell_lengths", + sum_of_spell_lengths, + select_time_before_computation=False, + ) + Excess = GenericIndicator("excess", excess, check_vars=check_single_var) + Deficit = GenericIndicator("deficit", deficit, check_vars=check_single_var) + FractionOfTotal = GenericIndicator( + "fraction_of_total", fraction_of_total, check_vars=check_single_var + ) + Maximum = GenericIndicator("maximum", maximum) + Minimum = GenericIndicator("minimum", minimum) + Average = GenericIndicator("average", average) + Sum = GenericIndicator("sum", sum) + StandardDeviation = GenericIndicator("standard_deviation", standard_deviation) + MaxOfRollingSum = GenericIndicator( + "max_of_rolling_sum", max_of_rolling_sum, check_vars=check_single_var + ) + MinOfRollingSum = GenericIndicator( + "min_of_rolling_sum", min_of_rolling_sum, check_vars=check_single_var + ) + MaxOfRollingAverage = GenericIndicator( + "max_of_rolling_average", max_of_rolling_average, check_vars=check_single_var + ) + MinOfRollingAverage = GenericIndicator( + "min_of_rolling_average", min_of_rolling_average, check_vars=check_single_var + ) + MeanOfDifference = GenericIndicator( + "mean_of_difference", mean_of_difference, check_vars=check_couple_of_vars + ) + DifferenceOfExtremes = GenericIndicator( + "difference_of_extremes", + difference_of_extremes, + check_vars=check_couple_of_vars, + ) + MeanOfAbsoluteOneTimeStepDifference = GenericIndicator( + "mean_of_absolute_one_time_step_difference", + mean_of_absolute_one_time_step_difference, + check_vars=check_couple_of_vars, + ) + DifferenceOfMeans = GenericIndicator( + "difference_of_means", + difference_of_means, + check_vars=check_couple_of_vars, + sampling_methods=[ + RESAMPLE_METHOD, + GROUP_BY_METHOD, + GROUP_BY_REF_AND_RESAMPLE_STUDY_METHOD, + ], + ) + + +@percentile_bootstrap +def _compute_exceedance( + study: DataArray, + threshold: DataArray | PercentileDataArray, + operator: Operator, + freq: str, # noqa @percentile_bootstrap (don't rename it, it breaks bootstrap) + bootstrap: bool, # noqa @percentile_bootstrap + is_doy_per: bool, +) -> DataArray: + if is_doy_per: + threshold = resample_doy(threshold, study) + res = operator(study, threshold) + if bootstrap: + res.attrs[REFERENCE_PERIOD_ID] = build_climatology_bounds(study) + return res + + +def _get_couple_of_var( + climate_vars: list[ClimateVariable], indicator: str +) -> tuple[DataArray, DataArray]: + if climate_vars[0].threshold or climate_vars[1].threshold: + raise InvalidIcclimArgumentError( + f"{indicator} cannot be computed with thresholds." + ) + study = climate_vars[0].studied_data + ref = climate_vars[1].studied_data + study = convert_units_to(study, ref) + return study, ref + + +def _run_rolling_reducer( + climate_vars: list[ClimateVariable], + resample_freq: Frequency, + rolling_window_width: int, + rolling_op: Callable[[DataArrayRolling], DataArray], # sum | mean + resampled_op: Callable[[...], DataArray], # max | min + date_event: bool, + source_freq_delta: timedelta, +) -> DataArray: + thresh_operator, study, threshold = _get_single_var(climate_vars) + if threshold: + exceedance = _compute_exceedance( + operator=thresh_operator, + study=study, + freq=resample_freq.pandas_freq, + threshold=threshold.value, + bootstrap=_must_run_bootstrap(study, threshold), + is_doy_per=threshold.is_doy_per_threshold, + ).squeeze() + study = study.where(exceedance) + study = rolling_op(study.rolling(time=rolling_window_width)) + study = study.resample(time=resample_freq.pandas_freq) + if date_event: + return _reduce_with_date_event( + resampled=study, + reducer=resampled_op, + window=rolling_window_width, + source_delta=source_freq_delta, + ) + else: + return resampled_op(study, dim="time") # type:ignore + + +def _run_simple_reducer( + climate_vars: list[ClimateVariable], + resample_freq: Frequency, + reducer_op: Callable[..., DataArray], + date_event: bool, +): + thresh_op, study, threshold = _get_single_var(climate_vars) + if threshold is not None: + exceedance = _compute_exceedance( + operator=thresh_op, + study=study, + freq=resample_freq.pandas_freq, + threshold=threshold.value, + bootstrap=_must_run_bootstrap(study, threshold), + is_doy_per=threshold.is_doy_per_threshold, + ).squeeze() + filtered_study = study.where(exceedance) + else: + filtered_study = study + if date_event: + return _reduce_with_date_event( + resampled=filtered_study.resample(time=resample_freq.pandas_freq), + reducer=reducer_op, + ) + else: + return reducer_op( + filtered_study.resample(time=resample_freq.pandas_freq), dim="time" + ) + + +def _compute_exceedances( + climate_vars: list[ClimateVariable], resample_freq: str, logical_link: LogicalLink +) -> DataArray: + exceedances = [ + _compute_exceedance( + operator=climate_var.threshold.operator, + study=climate_var.studied_data, + threshold=climate_var.threshold.value, + freq=resample_freq, + bootstrap=_must_run_bootstrap( + climate_var.studied_data, climate_var.threshold + ), + is_doy_per=climate_var.threshold.is_doy_per_threshold, + ).squeeze() + for climate_var in climate_vars + ] + return logical_link(exceedances) + + +def _get_single_var( + climate_vars: list[ClimateVariable], +) -> tuple[Operator | None, DataArray, Threshold | None]: + if climate_vars[0].threshold: + return ( + climate_vars[0].threshold.operator, + climate_vars[0].studied_data, + climate_vars[0].threshold, + ) + else: + return None, climate_vars[0].studied_data, None + + +def _must_run_bootstrap(da: DataArray, threshold: Threshold | None) -> bool: + """Avoid bootstrapping if there is one single year overlapping + or no year overlapping or all year overlapping. + """ + # TODO: Don't run bootstrap when not on extreme percentile + # (below 20? 10? or above 80? 90?) + if threshold is None or not threshold.is_doy_per_threshold: + return False + reference = threshold.value + study_years = np.unique(da.indexes.get("time").year) + overlapping_years = np.unique( + da.sel(time=_get_ref_period_slice(reference)).indexes.get("time").year + ) + return 1 < len(overlapping_years) < len(study_years) + + +def _get_ref_period_slice(da: DataArray) -> slice: + if (bds := da.attrs.get("climatology_bounds", None)) is not None: + return slice(*bds) + time_length = len(da.time) + return slice(*da.time[0 :: time_length - 1].dt.strftime("%Y-%m-%d").values) + + +def _same_freq_for_all(climate_vars: list[ClimateVariable]) -> bool: + if len(climate_vars) == 1: + return True + freqs = list(map(lambda a: xr.infer_freq(a.studied_data.time), climate_vars)) + return all(map(lambda x: x == freqs[0], freqs[1:])) + + +def _get_inputs_metadata( + climate_vars: list[ClimateVariable], resample_freq: Frequency, indicator_name +) -> list[dict[str, str]]: + return list( + map( + lambda cf_var: cf_var.build_indicator_metadata( + resample_freq, + _must_run_bootstrap(cf_var.studied_data, cf_var.threshold), + indicator_name, + ), + climate_vars, + ) + ) + + +def _reduce_with_date_event( + resampled: DataArrayResample, + reducer: Callable[[DataArrayResample], DataArray], + source_delta: timedelta | None = None, + window: int | None = None, +) -> DataArray: + acc: list[DataArray] = [] + if reducer == DataArrayResample.max: + group_reducer = DataArray.argmax + elif reducer == DataArrayResample.min: + group_reducer = DataArray.argmin + else: + raise NotImplementedError( + f"Can't compute date_event due to unknown reducer:" f" '{reducer}'" + ) + for label, sample in resampled: + reduced_result = sample.isel(time=group_reducer(sample, dim="time")) + if window is not None: + result = _add_date_coords( + original_sample=sample, + result=sample.sum(dim="time"), + start_time=reduced_result.time, + end_time=reduced_result.time + window * source_delta, + label=label, + ) + else: + result = _add_date_coords( + original_sample=sample, + result=sample.sum(dim="time"), + event_date=reduced_result.time, + label=label, + ) + acc.append(result) + return xr.concat(acc, "time") + + +def _count_occurrences_with_date(resampled: DataArrayResample): + acc: list[DataArray] = [] + for label, sample in resampled: + # Fixme probably not safe to compute on huge dataset, + # it should be fixed with + # https://github.com/pydata/xarray/issues/2511 + sample = sample.compute() + first = sample.isel(time=sample.argmax("time")).time + reversed_time = sample.reindex(time=list(reversed(sample.time.values))) + last = reversed_time.isel(time=reversed_time.argmax("time")).time + dated_occurrences = _add_date_coords( + original_sample=sample, + result=sample.sum(dim="time"), + start_time=first, + end_time=last, + label=label, + ) + acc.append(dated_occurrences) + return xr.concat(acc, "time") + + +def _consecutive_occurrences_with_dates( + resampled: DataArrayResample, source_freq_delta: timedelta +): + acc = [] + for label, sample in resampled: + # todo might be unnecessary to replace NaN by 0 with the new rle + # (if the new rle does not generate NaN) + sample = sample.where(~sample.isnull(), 0) + time_index_of_max_rle = sample.argmax(dim="time") + # fixme: `.compute` is needed until xarray merges this pr: + # https://github.com/pydata/xarray/pull/5873 + time_index_of_max_rle = time_index_of_max_rle.compute() + dated_longest_run = sample[{"time": time_index_of_max_rle}] + start_time = sample.isel( + time=time_index_of_max_rle.where(time_index_of_max_rle > 0, 0) + ).time + end_time = start_time + (dated_longest_run * source_freq_delta) + dated_longest_run = _add_date_coords( + original_sample=sample, + result=dated_longest_run, + start_time=start_time, + end_time=end_time, + label=label, + ) + acc.append(dated_longest_run) + result = xr.concat(acc, "time") + return result + + +def _add_date_coords( + original_sample: DataArray, + result: DataArray, + label: str | np.datetime64, + start_time: DataArray = None, + end_time: DataArray = None, + event_date: DataArray = None, +) -> DataArray: + new_coords = {c: original_sample[c] for c in original_sample.coords if c != "time"} + if event_date is None: + new_coords["event_date_start"] = start_time + new_coords["event_date_end"] = end_time + else: + new_coords["event_date"] = event_date + new_coords["time"] = label + return DataArray(data=result, coords=new_coords) + + +def _is_amount_unit(unit: str) -> bool: + # todo: maybe there is a more generic way to handle that with pint, + # we could try to convert to pint and check if it has a "day-1" in it + # (or a similar "by-time" unit) + return unit in ["cm", "mm", "m"] + + +def _to_percent(da: DataArray, sampling_freq: Frequency) -> DataArray: + if sampling_freq == FrequencyRegistry.MONTH: + da = da / da.time.dt.daysinmonth * 100 + elif sampling_freq == FrequencyRegistry.YEAR: + coef = xr.full_like(da, 1) + leap_years = _is_leap_year(da) + coef[{"time": leap_years}] = 366 + coef[{"time": ~leap_years}] = 365 + da = da / coef + elif sampling_freq == FrequencyRegistry.AMJJAS: + da = da / 183 + elif sampling_freq == FrequencyRegistry.ONDJFM: + coef = xr.full_like(da, 1) + leap_years = _is_leap_year(da) + coef[{"time": leap_years}] = 183 + coef[{"time": ~leap_years}] = 182 + da = da / coef + elif sampling_freq == FrequencyRegistry.DJF: + coef = xr.full_like(da, 1) + leap_years = _is_leap_year(da) + coef[{"time": leap_years}] = 91 + coef[{"time": ~leap_years}] = 90 + da = da / coef + elif sampling_freq in [FrequencyRegistry.MAM, FrequencyRegistry.JJA]: + da = da / 92 + elif sampling_freq == FrequencyRegistry.SON: + da = da / 91 + else: + # TODO improve this for custom resampling + warn( + "For now, '%' unit can only be used when `slice_mode` is one of: " + "{MONTH, YEAR, AMJJAS, ONDJFM, DJF, MAM, JJA, SON}." + ) + return da + da.attrs[UNITS_ATTRIBUTE_KEY] = PART_OF_A_WHOLE_UNIT + return da + + +def _is_leap_year(da: DataArray) -> np.ndarray: + time_index = da.indexes.get("time") + if isinstance(time_index, xr.CFTimeIndex): + return CfCalendarRegistry.lookup(time_index.calendar).is_leap(da.time.dt.year) + else: + return da.time.dt.is_leap_year diff --git a/icclim/generic_indices/generic_templates.py b/icclim/generic_indices/generic_templates.py new file mode 100644 index 00000000..0d860750 --- /dev/null +++ b/icclim/generic_indices/generic_templates.py @@ -0,0 +1,211 @@ +from __future__ import annotations + +from icclim.generic_indices.cf_var_metadata import IndicatorMetadata + +# fmt: off +# flake8: noqa + +COMBINED_VARS_LONG_NAME = ( + "{% for i, climate_var in enumerate(climate_vars) %}" + "{{climate_var.long_name}} is" + " {{climate_var.threshold.long_name}}" + "{% if climate_var.threshold.additional_metadata %}" + " {{climate_var.threshold.additional_metadata}}" + "{% endif %}" + "{% if i != len(climate_vars) - 1 %}" + " and " + "{% endif%}" + "{% endfor %}" + " for each {{output_freq.long_name}}." +) +COMBINED_VARS_STANDARD_NAME = ( + "{% for i, climate_var in enumerate(climate_vars) %}" + "{{climate_var.standard_name}}" + "{% if i != len(climate_vars) - 1 %}" + "_and_" + "{% endif %}" + "{% endfor %}" +) +SINGLE_VAR_LONG_NAME = ( + "{{source_freq.adjective}}" + " {{climate_vars[0].long_name}}" + " related to{{climate_vars[0].threshold.value}}" + " for each {{output_freq.long_name}}." + "{% if climate_vars[0].threshold.additional_metadata %}" + " {{climate_vars[0].threshold.additional_metadata}}" + "{% endif %}" +) +SINGLE_VAR_LONG_NAME_WITH_EXCEEDANCE = ( + "{{source_freq.adjective}}" + " {{climate_vars[0].long_name}}" + "{% if climate_vars[0].threshold %}" + " when {{climate_vars[0].long_name}} is {{climate_vars[0].threshold.long_name}}" + "{% endif %}" + " for each {{output_freq.long_name}}." + "{% if climate_vars[0].threshold.additional_metadata %}" + " {{climate_vars[0].threshold.additional_metadata}}" + "{% endif %}" +) + +INDICATORS_TEMPLATES_EN: dict[str, IndicatorMetadata] = { + "count_occurrences": { + "long_name": "Number of {{source_freq.units}}" + f" when {COMBINED_VARS_LONG_NAME}", + "standard_name": "number_of_{{source_freq.units}}_with" + f"_{COMBINED_VARS_STANDARD_NAME}" + "_above_threshold", + "cell_methods": "time: sum over {{source_freq.units}}", + }, + "max_consecutive_occurrence": { + "standard_name": "spell_length_of_{{source_freq.units}}_with" + f"_{COMBINED_VARS_STANDARD_NAME}" + "_above_threshold", + "long_name": "Maximum number of consecutive {{source_freq.units}} when" + f" {COMBINED_VARS_LONG_NAME}", + "cell_methods": "time: maximum over {{source_freq.units}}", + }, + "sum_of_spell_lengths": { + "standard_name": # not CF + "spell_length_of_{{source_freq.units}}_with" + f"_{COMBINED_VARS_STANDARD_NAME}" + "_above_thresholds", + "long_name": "Sum of spell lengths of at least {{min_spell_length}}" + " {{source_freq.units}} when" + f" {COMBINED_VARS_LONG_NAME}", + "cell_methods": "time: sum over {{source_freq.units}}", + }, + "excess": { + "standard_name": "integral_of" + "_{{climate_vars[0].standard_name}}" + "_excess_wrt_time", + "long_name": f"Excess of {SINGLE_VAR_LONG_NAME}", + "cell_methods": "time: difference over {{source_freq.units}}", + }, + "deficit": { + "standard_name": "integral_of_{{climate_vars[0].standard_name}}" + "_deficit_wrt_time", + "long_name": f"Deficit of {SINGLE_VAR_LONG_NAME}", + "cell_methods": "time: difference over {{source_freq.units}}", + }, + "fraction_of_total": { + "standard_name": # not CF + "fraction_of_thresholded_{{climate_vars[0].standard_name}}" + "_on_total", + "long_name": f"Fraction of {SINGLE_VAR_LONG_NAME}", + # not cf + "cell_methods": "time: fraction over {{source_freq.units}}", + }, + "maximum": { + "standard_name": "{{climate_vars[0].standard_name}}", + "long_name": f"Maximum of {SINGLE_VAR_LONG_NAME_WITH_EXCEEDANCE}", + "cell_methods": "time: maximum over {{source_freq.units}}", + }, + "minimum": { + "standard_name":"{{climate_vars[0].standard_name}}", + "long_name": "Minimum of" + f" {SINGLE_VAR_LONG_NAME_WITH_EXCEEDANCE}", + "cell_methods": "time: minimum over {{source_freq.units}}", + }, + "average": { + "standard_name": "{{climate_vars[0].standard_name}}", + "long_name": "Average of" + f" {SINGLE_VAR_LONG_NAME_WITH_EXCEEDANCE}", + "cell_methods": "time: mean over {{source_freq.units}}", + }, + "sum": { + "standard_name": "{{climate_vars[0].standard_name}}", + "long_name": "Sum of" + f" {SINGLE_VAR_LONG_NAME_WITH_EXCEEDANCE}", + "cell_methods": "time: sum over {{source_freq.units}}", + }, + "standard_deviation": { + "standard_name": "{{climate_vars[0].standard_name}}", + "long_name": "Standard deviation of" + f" {SINGLE_VAR_LONG_NAME_WITH_EXCEEDANCE}", + "cell_methods": "time: standard_deviation over {{source_freq.units}}", + }, + "max_of_rolling_sum": { + "standard_name": "{{climate_vars[0].standard_name}}", + "long_name": "Maximum {{rolling_window_width}}" + " {{source_freq.units}} rolling sum of" + f" {SINGLE_VAR_LONG_NAME_WITH_EXCEEDANCE}", + "cell_methods": "time: sum over {{source_freq.units}}", + }, + "min_of_rolling_sum": { + "standard_name": "{{climate_vars[0].standard_name}}", + "long_name": "Minimum {{rolling_window_width}}" + " {{source_freq.units}} rolling sum of" + f" {SINGLE_VAR_LONG_NAME_WITH_EXCEEDANCE}", + "cell_methods": "time: sum over {{source_freq.units}}", + }, + "min_of_rolling_average": { + "standard_name": "{{climate_vars[0].standard_name}}", + "long_name": "Minimum {{rolling_window_width}}" + " {{source_freq.units}} rolling average of" + f" {SINGLE_VAR_LONG_NAME_WITH_EXCEEDANCE}", + "cell_methods": "time: mean over {{source_freq.units}}", + }, + "max_of_rolling_average": { + "standard_name": "{{climate_vars[0].standard_name}}", + "long_name": "Maximum {{rolling_window_width}}" + " {{source_freq.units}} rolling average of" + f" {SINGLE_VAR_LONG_NAME_WITH_EXCEEDANCE}", + "cell_methods": "time: mean over {{source_freq.units}}", + }, + "mean_of_difference": { + "standard_name": "range_between_{{climate_vars[0].standard_name}}" + "_and_{{climate_vars[1].standard_name}}", # not CF + "long_name": "Mean of difference between {{climate_vars[0].long_name}}" + " and {{climate_vars[1].long_name}}" + " for each {{output_freq.long_name}}.", + "cell_methods": "time: range within {{source_freq.units}}" + " time: mean over {{source_freq.units}}", + }, + "difference_of_extremes": { + "standard_name": "range_of_extremes_between_{{climate_vars[0].standard_name}}" + "_and_{{climate_vars[1].standard_name}}", # not CF + "long_name": "Difference between" + " maximum of {{source_freq.adjective}}" + " {{climate_vars[0].long_name}}" + " and minimum of {{source_freq.adjective}}" + " {{climate_vars[1].long_name}}" + " for each {{output_freq.long_name}}.", + "cell_methods": "time: range within {{source_freq.units}}" + " time: maximum over {{source_freq.units}}" + " time: minimum over {{source_freq.units}}", + }, + "mean_of_absolute_one_time_step_difference": { + "standard_name": "variability_range_between_{{climate_vars[0].standard_name}}" + "_and_{{climate_vars[1].standard_name}}", # not CF + "long_name": "Average of the absolute {{source_freq.long_name}}" + " to {{source_freq.long_name}} difference" + " of the {{source_freq.adjective}} variation between" + " {{climate_vars[0].long_name}}" + " and {{climate_vars[1].long_name}}" + " for each {{output_freq.long_name}}.", + "cell_methods": "time: range within {{source_freq.units}}" + " time: difference over {{source_freq.units}}" + " time: mean over {{source_freq.units}}", + }, + "difference_of_means": { + "standard_name": "{{climate_vars[0].standard_name}}" + "{% if not is_compared_to_reference %}" + "_to_{{climate_vars[1].standard_name}}" + "{% endif%}" + "_anomaly", # not CF + "long_name": "{{output_freq.adjective}} difference between the" + " averaged {{source_freq.adjective}}" + " {{climate_vars[0].long_name}}" + " and" + "{% if is_compared_to_reference %}" + " its averaged {{source_freq.adjective}} values for the" + " {{reference_period}} period." + "{% else %}" + " the {{output_freq.adjective}}" + " averaged {{source_freq.adjective}}" + " {{climate_vars[1].long_name}}" + "{% endif%}", + "cell_methods": "time: mean over {{source_freq.units}}" + " time: difference over {{source_freq.units}}", + }, +} diff --git a/icclim/icclim_logger.py b/icclim/icclim_logger.py index d8affd0d..1dc01920 100644 --- a/icclim/icclim_logger.py +++ b/icclim/icclim_logger.py @@ -1,30 +1,27 @@ from __future__ import annotations +import dataclasses import logging import time -from enum import Enum -from icclim.icclim_exceptions import InvalidIcclimArgumentError +from icclim.models.registry import Registry -class Verbosity(Enum): - LOW = ("LOW", "INFO") - HIGH = ("HIGH", "INFO") - SILENT = ("SILENT", "ERROR") +@dataclasses.dataclass +class Verbosity: + verbosity_level: str + log_level: str - def __init__(self, icc_verbosity: str, log_level: str): - self.icc_verbosity = icc_verbosity - self.log_level = log_level + +class VerbosityRegistry(Registry): + _item_class = Verbosity + LOW = Verbosity("LOW", "INFO") + HIGH = Verbosity("HIGH", "INFO") + SILENT = Verbosity("SILENT", "ERROR") @staticmethod - def lookup(query: str) -> Verbosity: - for v in Verbosity: - if query.upper() == v.name: - return v - raise InvalidIcclimArgumentError( - f"Unrecognized log verbosity {query}. " - f"Use one of {[v.name for v in Verbosity]}" - ) + def get_item_aliases(item: Verbosity) -> list[str]: + return [item.verbosity_level.upper()] class IcclimLogger: @@ -33,10 +30,10 @@ class IcclimLogger: """ __instance = None - verbosity: Verbosity = Verbosity.LOW + verbosity: Verbosity = VerbosityRegistry.LOW @staticmethod - def get_instance(verbosity: Verbosity = Verbosity.LOW): + def get_instance(verbosity: Verbosity = VerbosityRegistry.LOW): if IcclimLogger.__instance is None: IcclimLogger(verbosity) return IcclimLogger.__instance @@ -55,7 +52,7 @@ def __init__(self, verbosity: Verbosity): def set_verbosity(self, verbosity: str | Verbosity): if isinstance(verbosity, str): - verbosity = Verbosity.lookup(verbosity) + verbosity = VerbosityRegistry.lookup(verbosity) self.verbosity = verbosity logging.root.setLevel(verbosity.log_level) @@ -64,9 +61,9 @@ def start_message(self): # flake8: noqa time_now = time.asctime(time.gmtime()) - if self.verbosity == Verbosity.SILENT: + if self.verbosity == VerbosityRegistry.SILENT: return - if self.verbosity == Verbosity.LOW: + if self.verbosity == VerbosityRegistry.LOW: logging.info(f"--- icclim {icclim_version}") logging.info("--- BEGIN EXECUTION") return @@ -104,9 +101,9 @@ def ending_message(self, time_cpu): # flake8: noqa time_now = time.asctime(time.gmtime()) - if self.verbosity == Verbosity.SILENT: + if self.verbosity == VerbosityRegistry.SILENT: return - if self.verbosity == Verbosity.LOW: + if self.verbosity == VerbosityRegistry.LOW: logging.info(f"--- icclim {icclim_version}") logging.info("--- CPU SECS = %-10.3f", time_cpu) logging.info("--- END EXECUTION") diff --git a/icclim/icclim_types.py b/icclim/icclim_types.py new file mode 100644 index 00000000..dd0531bf --- /dev/null +++ b/icclim/icclim_types.py @@ -0,0 +1,18 @@ +from __future__ import annotations + +from typing import Dict, List, Literal, Sequence, Tuple, Union + +from xarray import DataArray, Dataset + +InFileBaseType = Union[str, List[str], Dataset, DataArray] +ThresholdedDict = Dict[str, Union[Dict]] # Dict === InFileDictionary +InFileLike = Union[ThresholdedDict, InFileBaseType, Dict[str, InFileBaseType]] + +FrequencyLike = Union[str, List[Union[str, Tuple, int]], Tuple[str, Union[List, Tuple]]] +# MonthsIndexer format: [12,1,2,3] +MonthsIndexer = Dict[Literal["month"], Sequence[int]] +# DatesIndexer format: ("01-25", "02-28") +DatesIndexer = Dict[Literal["date_bounds"], Tuple[str, str]] +Indexer = Union[MonthsIndexer, DatesIndexer] + +SamplingMethodLike = Literal["groupby", "resample", "groupby_ref_and_resample_study"] diff --git a/icclim/main.py b/icclim/main.py index 7fb9eb20..f87d47c2 100644 --- a/icclim/main.py +++ b/icclim/main.py @@ -2,15 +2,15 @@ # Copyright CERFACS (http://cerfacs.fr/) # Apache License, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0) """ -Main module of icclim. +Main entry point of icclim. +This module expose the index API endpoint as long as a few other functions. """ from __future__ import annotations -import copy -import logging import time from datetime import datetime -from typing import Callable, Literal +from functools import partial +from typing import Callable, Literal, Sequence from warnings import warn import xarray as xr @@ -18,35 +18,54 @@ from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset -from icclim.ecad.ecad_functions import IndexConfig -from icclim.ecad.ecad_indices import EcadIndex, get_season_excluded_indices +from icclim.ecad.ecad_indices import EcadIndexRegistry +from icclim.generic_indices.generic_indicators import ( + GenericIndicator, + GenericIndicatorRegistry, + Indicator, +) from icclim.icclim_exceptions import InvalidIcclimArgumentError -from icclim.icclim_logger import IcclimLogger, Verbosity -from icclim.models.climate_index import ClimateIndex -from icclim.models.constants import ICCLIM_VERSION, QUANTILE_BASED -from icclim.models.frequency import Frequency, SliceMode -from icclim.models.index_group import IndexGroup -from icclim.models.netcdf_version import NetcdfVersion -from icclim.models.quantile_interpolation import QuantileInterpolation -from icclim.models.user_index_config import UserIndexConfig -from icclim.models.user_index_dict import UserIndexDict -from icclim.pre_processing.input_parsing import ( - InFileType, - build_cf_variables, - guess_var_names, - read_dataset, - update_to_standard_coords, +from icclim.icclim_logger import IcclimLogger, Verbosity, VerbosityRegistry +from icclim.icclim_types import InFileLike, SamplingMethodLike +from icclim.models.climate_variable import ( + ClimateVariable, + build_climate_vars, + must_add_reference_var, + to_dictionary, +) +from icclim.models.constants import ( + ICCLIM_VERSION, + PERCENTILE_THRESHOLD_STAMP, + RESAMPLE_METHOD, + UNITS_ATTRIBUTE_KEY, + USER_INDEX_PRECIPITATION_STAMP, + USER_INDEX_TEMPERATURE_STAMP, ) -from icclim.user_indices.calc_operation import CalcOperation, compute_user_index +from icclim.models.frequency import Frequency, FrequencyLike, FrequencyRegistry +from icclim.models.index_config import IndexConfig +from icclim.models.index_group import IndexGroup, IndexGroupRegistry +from icclim.models.logical_link import LogicalLink, LogicalLinkRegistry +from icclim.models.netcdf_version import NetcdfVersion, NetcdfVersionRegistry +from icclim.models.operator import OperatorRegistry +from icclim.models.quantile_interpolation import ( + QuantileInterpolation, + QuantileInterpolationRegistry, +) +from icclim.models.standard_index import StandardIndex +from icclim.models.threshold import Threshold +from icclim.models.user_index_dict import UserIndexDict +from icclim.user_indices.calc_operation import CalcOperationRegistry +from icclim.utils import read_date -log: IcclimLogger = IcclimLogger.get_instance(Verbosity.LOW) +log: IcclimLogger = IcclimLogger.get_instance(VerbosityRegistry.LOW) HISTORY_CF_KEY = "history" SOURCE_CF_KEY = "source" +ICCLIM_REFERENCE = "icclim" def indices( - index_group: Literal["all"] | str | IndexGroup | list[str], + index_group: Literal["all"] | str | IndexGroup | Sequence[str], ignore_error: bool = False, **kwargs, ) -> Dataset: @@ -63,6 +82,8 @@ def indices( The value "all" can also be used to compute every indices. Note that the input given by ``in_files`` must include all the necessary variables to compute the indices of this group. + ignore_error: bool + When True, ignore indices that fails to compute. kwargs : Dict ``icclim.index`` keyword arguments. @@ -76,15 +97,15 @@ def indices( file, which will contain all the index results of this group. """ - if isinstance(index_group, list): - indices = [EcadIndex.lookup(i) for i in index_group] - elif index_group == IndexGroup.WILD_CARD_GROUP or ( + if isinstance(index_group, (tuple, list)): + indices = [EcadIndexRegistry.lookup(i) for i in index_group] + elif index_group == IndexGroupRegistry.WILD_CARD_GROUP or ( isinstance(index_group, str) - and index_group.lower() == IndexGroup.WILD_CARD_GROUP.value + and index_group.lower() == IndexGroupRegistry.WILD_CARD_GROUP.name ): - indices = iter(EcadIndex) + indices = EcadIndexRegistry.values() else: - indices = IndexGroup.lookup(index_group).get_indices() + indices = IndexGroupRegistry.lookup(index_group).get_indices() out = None if "out_file" in kwargs.keys(): out = kwargs["out_file"] @@ -94,23 +115,33 @@ def indices( kwargs["index_name"] = i.short_name if ignore_error: try: - acc.append(index(**kwargs)) + res = index(**kwargs) + if "percentiles" in res.coords: + res = res.rename({"percentiles": i.short_name + "_percentiles"}) + if "thresholds" in res.coords: + res = res.rename({"thresholds": i.short_name + "_thresholds"}) + acc.append(res) except Exception: warn(f"Could not compute {i.short_name}.") else: - acc.append(index(**kwargs)) + res = index(**kwargs) + if "percentiles" in res.coords: + res = res.rename({"percentiles": i.short_name + "_percentiles"}) + if "thresholds" in res.coords: + res = res.rename({"thresholds": i.short_name + "_thresholds"}) + acc.append(res) ds: Dataset = xr.merge(acc) if out is not None: _write_output_file( result_ds=ds, input_time_encoding=ds.time.encoding, - netcdf_version=kwargs.get("netcdf_version", NetcdfVersion.NETCDF4), + netcdf_version=kwargs.get("netcdf_version", NetcdfVersionRegistry.NETCDF4), file_path=out, ) return ds -def indice(*args, **kwargs): +def indice(*args, **kwargs) -> Dataset: """ Deprecated proxy for `icclim.index` function. To be deleted in a futur version. @@ -120,29 +151,34 @@ def indice(*args, **kwargs): def index( - in_files: InFileType, + in_files: InFileLike, index_name: str | None = None, # optional when computing user_indices - var_name: str | list[str] | None = None, - slice_mode: SliceMode = Frequency.YEAR, - time_range: list[datetime] | list[str] | tuple[str, str] | None = None, + var_name: str | Sequence[str] | None = None, + slice_mode: FrequencyLike | Frequency = "year", + time_range: Sequence[datetime | str] | None = None, out_file: str | None = None, - threshold: float | list[float] | None = None, + threshold: str | Threshold | Sequence[str] | Sequence[Threshold] = None, callback: Callable[[int], None] = log.callback, callback_percentage_start_value: int = 0, callback_percentage_total: int = 100, - base_period_time_range: list[datetime] | list[str] | tuple[str, str] | None = None, - window_width: int = 5, + base_period_time_range: Sequence[datetime] | Sequence[str] | None = None, + doy_window_width: int = 5, only_leap_years: bool = False, ignore_Feb29th: bool = False, - interpolation: ( - str | QuantileInterpolation | None - ) = QuantileInterpolation.MEDIAN_UNBIASED, + interpolation: str | QuantileInterpolation = "median_unbiased", out_unit: str | None = None, - netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, + netcdf_version: str | NetcdfVersion = "NETCDF4", user_index: UserIndexDict | None = None, - save_percentile: bool = False, - logs_verbosity: Verbosity | str = Verbosity.LOW, - # deprecated parameters + save_thresholds: bool = False, + logs_verbosity: Verbosity | str = "LOW", + date_event: bool = False, + min_spell_length: int | None = 6, + rolling_window_width: int | None = 5, + sampling_method: SamplingMethodLike = RESAMPLE_METHOD, + *, + # deprecated params are kwargs only + window_width: int | None = None, + save_percentile: bool | None = None, indice_name: str = None, user_indice: UserIndexDict = None, transfer_limit_Mbytes: float = None, @@ -152,19 +188,19 @@ def index( Parameters ---------- - in_files : str | list[str] | Dataset | DataArray | InputDictionary, + in_files: str | list[str] | Dataset | DataArray | InputDictionary, Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs, or path to zarr store, or xarray.Dataset or xarray.DataArray. - index_name : str + index_name: str Climate index name. For ECA&D index, case insensitive name used to lookup the index. For user index, it's the name of the output variable. - var_name : str | list[str] | None + var_name: str | list[str] | None ``optional`` Target variable name to process corresponding to ``in_files``. If None (default) on ECA&D index, the variable is guessed based on the climate index wanted. Mandatory for a user index. - slice_mode : SliceMode + slice_mode: SliceMode Type of temporal aggregation: The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON", "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}`` @@ -173,131 +209,231 @@ def index( ``("season", ("19 july", "14 august"))``. Default is "year". See :ref:`slice_mode` for details. - time_range : list[datetime ] | list[str] | tuple[str, str] | None + time_range: list[datetime ] | list[str] | tuple[str, str] | None ``optional`` Temporal range: upper and lower bounds for temporal subsetting. If ``None``, whole period of input files will be processed. The dates can either be given as instance of datetime.datetime or as string values. For strings, many format are accepted. Default is ``None``. - out_file : str | None + out_file: str | None Output NetCDF file name (default: "icclim_out.nc" in the current directory). Default is "icclim_out.nc". If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored. Use the function returned value instead to retrieve the computed value. If ``out_file`` already exists, icclim will overwrite it! - threshold : float | list[float] | None + threshold: float | list[float] | None ``optional`` User defined threshold for certain indices. Default depend on the index, see their individual definition. When a list of threshold is provided, the index will be computed for each thresholds. - transfer_limit_Mbytes : float + transfer_limit_Mbytes: float Deprecated, does not have any effect. - callback : Callable[[int], None] + callback: Callable[[int], None] ``optional`` Progress bar printing. If ``None``, progress bar will not be printed. - callback_percentage_start_value : int + callback_percentage_start_value: int ``optional`` Initial value of percentage of the progress bar (default: 0). - callback_percentage_total : int + callback_percentage_total: int ``optional`` Total percentage value (default: 100). - base_period_time_range : list[datetime ] | list[str] | tuple[str, str] | None - ``optional`` Temporal range of the reference period on which percentiles are - computed. + base_period_time_range: list[datetime ] | list[str] | tuple[str, str] | None + ``optional`` Temporal range of the reference period. + The dates can either be given as instance of datetime.datetime or as string + values. + It is used either: + #. to compute percentiles if threshold is filled. When missing, the studied period is used to compute percentiles. The study period is either the dataset filtered by `time_range` or the whole - dataset if `time_range` is None. - On temperature based indices relying on percentiles (TX90p, WSDI...), the + dataset if `time_range` is missing. + For day of year percentiles (doy_per), on extreme percentiles the overlapping period between `base_period_time_range` and the study period is bootstrapped. - On indices not relying on percentiles, this parameter is ignored. - The dates can either be given as instance of datetime.datetime or as string - values. - For strings, many format are accepted. - window_width : int - ``optional`` User defined window width for related indices (default: 5). - Ignored for non related indices. - only_leap_years : bool + #. to compute a reference period for indices such as difference_of_mean + (a.k.a anomaly) if a single variable is given in input. + doy_window_width: int + ``optional`` Window width used to aggreagte day of year values when computing + day of year percentiles (doy_per) + Default: 5 (5 days). + min_spell_length: int + ``optional`` Minimum spell duration to be taken into account when computing the + sum_of_spell_lengths. + rolling_window_width: int + ``optional`` Window width of the rolling window for indicators such as + `{max_of_rolling_sum, max_of_rolling_average, min_of_rolling_sum, min_of_rolling_average}` # noqa + only_leap_years: bool ``optional`` Option for February 29th (default: False). - ignore_Feb29th : bool + ignore_Feb29th: bool ``optional`` Ignoring or not February 29th (default: False). - interpolation : str | QuantileInterpolation | None + interpolation: str | QuantileInterpolation | None ``optional`` Interpolation method to compute percentile values: - ``{"linear", "hyndman_fan"}`` - Default is "hyndman_fan", a.k.a type 8 or method 8. + ``{"linear", "median_unbiased"}`` + Default is "median_unbiased", a.k.a type 8 or method 8. Ignored for non percentile based indices. - out_unit : str | None + out_unit: str | None ``optional`` Output unit for certain indices: "days" or "%" (default: "days"). - netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion + netcdf_version: str | NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - user_index : UserIndexDict + user_index: UserIndexDict ``optional`` A dictionary with parameters for user defined index. See :ref:`Custom indices`. Ignored for ECA&D indices. - save_percentile : bool - ``optional`` True if the percentiles should be saved within the resulting netcdf + save_thresholds: bool + ``optional`` True if the thresholds should be saved within the resulting netcdf file (default: False). - logs_verbosity : str | Verbosity + date_event: bool + When True the date of the event (such as when a maximum is reached) will be + stored in coordinates variables. + **warning** This option may significantly slow down computation. + logs_verbosity: str | Verbosity ``optional`` Configure how verbose icclim is. Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW") - indice_name : str | None + sampling_method: str + Choose whether the output sampling configured in `slice_mode` is a + `groupby` operation or a `resample` operation (as per xarray definition). + Possible values: ``{"groupby", "resample", "groupby_ref_and_resample_study"}`` + (default: "resample") + `groupby_ref_and_resample_study` may only be used when computing the + `difference_of_means` (a.k.a the anomaly). + indice_name: str | None DEPRECATED, use index_name instead. - user_indice : dict | None + user_indice: dict | None DEPRECATED, use user_index instead. + window_width: int + DEPRECATED, use doy_window_width, min_spell_length or rolling_window_width + instead. + save_percentile: bool + DEPRECATED, use save_thresholds instead. """ - _setup(callback, callback_percentage_start_value, logs_verbosity, slice_mode) - index_name, user_index = _handle_deprecated_params( - index_name, indice_name, transfer_limit_Mbytes, user_index, user_indice + _setup(callback, callback_percentage_start_value, logs_verbosity) + ( + index_name, + user_index, + save_thresholds, + doy_window_width, + ) = _handle_deprecated_params( + index_name, + user_index, + save_thresholds, + doy_window_width, + indice_name, + transfer_limit_Mbytes, + user_indice, + save_percentile, + window_width, ) + del indice_name, transfer_limit_Mbytes, user_indice, save_percentile, window_width # -- Choose index to compute - if user_index is None and index_name is None: - raise InvalidIcclimArgumentError( - "No index to compute." - " You must provide either `user_index` to compute a customized index" - " or `index_name` for one of the ECA&D indices." + interpolation = QuantileInterpolationRegistry.lookup(interpolation) + indicator: GenericIndicator + standard_index: StandardIndex | None + logical_link: LogicalLink + coef: float | None + build_configured_threshold = partial( + build_threshold, + doy_window_width=doy_window_width, + base_period_time_range=base_period_time_range, + only_leap_years=only_leap_years, + interpolation=interpolation, + ) + if user_index is not None: + standard_index = None + indicator = read_indicator(user_index) + if threshold is None: + threshold = read_threshold(user_index, build_configured_threshold) + logical_link = read_logical_link(user_index) + coef = read_coef(user_index) + date_event = read_date_event(user_index) + rename = index_name or user_index.get("index_name", None) or "user_index" + output_unit = out_unit + rolling_window_width = user_index.get("window_width", rolling_window_width) + base_period_time_range = user_index.get( + "ref_time_range", base_period_time_range ) - if index_name is not None: - index = EcadIndex.lookup(index_name) + elif index_name is not None: + logical_link = LogicalLinkRegistry.LOGICAL_AND + coef = None + standard_index = EcadIndexRegistry.lookup(index_name, no_error=True) + if standard_index is None: + indicator = GenericIndicatorRegistry.lookup(index_name) + rename = None + output_unit = out_unit + else: + indicator = standard_index.generic_indicator + threshold = standard_index.threshold + rename = standard_index.short_name + output_unit = out_unit or standard_index.output_unit else: - index = None - input_dataset = read_dataset(in_files, index, var_name) - input_dataset, reset_coords_dict = update_to_standard_coords(input_dataset) - sampling_frequency = Frequency.lookup(slice_mode) - cf_vars = build_cf_variables( - var_names=guess_var_names(input_dataset, in_files, index, var_name), - ds=input_dataset, - time_range=time_range, + raise InvalidIcclimArgumentError( + "You must fill either index_name or user_index" + "to compute a climate index." + ) + sampling_frequency = FrequencyRegistry.lookup(slice_mode) + if isinstance(threshold, str): + threshold = build_configured_threshold(threshold) + elif isinstance(threshold, Sequence): + threshold = [build_configured_threshold(t) for t in threshold] + climate_vars_dict = to_dictionary( + in_files=in_files, + var_names=var_name, + threshold=threshold, + standard_index=standard_index, + ) + # We use groupby instead of resample when there is a single variable that must be + # compared to its reference period values. + is_compared_to_reference = must_add_reference_var( + climate_vars_dict, base_period_time_range + ) + indicator_name = ( + standard_index.short_name if standard_index is not None else indicator.name + ) + climate_vars = build_climate_vars( + climate_vars_dict=climate_vars_dict, ignore_Feb29th=ignore_Feb29th, - base_period_time_range=base_period_time_range, - only_leap_years=only_leap_years, - freq=sampling_frequency, + time_range=time_range, + base_period=base_period_time_range, + standard_index=standard_index, + is_compared_to_reference=is_compared_to_reference, ) + if base_period_time_range is not None: + reference_period = tuple( + map(lambda t: read_date(t).strftime("%m-%d-%Y"), base_period_time_range) + ) + else: + reference_period = None config = IndexConfig( - save_percentile=save_percentile, + save_thresholds=save_thresholds, frequency=sampling_frequency, - cf_variables=cf_vars, - window_width=window_width, - out_unit=out_unit, - netcdf_version=netcdf_version, + climate_variables=climate_vars, + min_spell_length=min_spell_length, + rolling_window_width=rolling_window_width, + out_unit=output_unit, + netcdf_version=NetcdfVersionRegistry.lookup(netcdf_version), interpolation=interpolation, callback=callback, - index=index, - threshold=threshold, + is_compared_to_reference=is_compared_to_reference, + reference_period=reference_period, + indicator_name=indicator_name, + logical_link=logical_link, + coef=coef, + date_event=date_event, + sampling_method=sampling_method, + ) + result_ds = _compute_standard_climate_index( + climate_index=indicator, + config=config, + initial_history=climate_vars[0].global_metadata["history"], + initial_source=climate_vars[0].global_metadata["source"], + rename=rename, + reference=standard_index.reference + if standard_index is not None + else ICCLIM_REFERENCE, ) - if user_index is not None: - result_ds = _compute_custom_climate_index(config=config, user_index=user_index) - else: - _check_valid_config(index, config) - result_ds = _compute_standard_climate_index( - config=config, - climate_index=index, - initial_history=input_dataset.attrs.get(HISTORY_CF_KEY, None), - initial_source=input_dataset.attrs.get(SOURCE_CF_KEY, None), - ) - if reset_coords_dict: - result_ds = result_ds.rename(reset_coords_dict) if out_file is not None: _write_output_file( - result_ds, input_dataset.time.encoding, config.netcdf_version, out_file + result_ds, + climate_vars[0].global_metadata["time_encoding"], + config.netcdf_version, + out_file, ) callback(callback_percentage_total) log.ending_message(time.process_time()) @@ -306,7 +442,7 @@ def index( def _write_output_file( result_ds: xr.Dataset, - input_time_encoding: dict, + input_time_encoding: dict | None, netcdf_version: NetcdfVersion, file_path: str, ) -> None: @@ -314,21 +450,29 @@ def _write_output_file( if input_time_encoding: time_encoding = { "calendar": input_time_encoding.get("calendar"), - "units": input_time_encoding.get("units"), + UNITS_ATTRIBUTE_KEY: input_time_encoding.get(UNITS_ATTRIBUTE_KEY), "dtype": input_time_encoding.get("dtype"), } else: - time_encoding = {"units": "days since 1850-1-1"} + time_encoding = {UNITS_ATTRIBUTE_KEY: "days since 1850-1-1"} result_ds.to_netcdf( file_path, - format=netcdf_version.value, + format=netcdf_version.name, encoding={"time": time_encoding}, ) def _handle_deprecated_params( - index_name, indice_name, transfer_limit_Mbytes, user_index, user_indice -) -> tuple[str, UserIndexDict]: + index_name, + user_index, + save_thresholds, + doy_window_width, + indice_name, + transfer_limit_Mbytes, + user_indice, + save_percentile, + window_width, +) -> tuple[str, UserIndexDict, bool, int]: if indice_name is not None: log.deprecation_warning(old="indice_name", new="index_name") index_name = indice_name @@ -337,10 +481,16 @@ def _handle_deprecated_params( user_index = user_indice if transfer_limit_Mbytes is not None: log.deprecation_warning(old="transfer_limit_Mbytes") - return index_name, user_index + if save_percentile is not None: + log.deprecation_warning(old="save_percentile", new="save_thresholds") + save_thresholds = save_percentile + if window_width is not None: + log.deprecation_warning(old="window_width", new="doy_window_width") + doy_window_width = window_width + return index_name, user_index, save_thresholds, doy_window_width -def _setup(callback, callback_start_value, logs_verbosity, slice_mode): +def _setup(callback, callback_start_value, logs_verbosity): # make xclim input daily check a warning instead of an error # TODO: it might be safer to feed a context manager which will setup # and teardown these confs @@ -352,37 +502,8 @@ def _setup(callback, callback_start_value, logs_verbosity, slice_mode): callback(callback_start_value) -def _compute_custom_climate_index( - config: IndexConfig, user_index: UserIndexDict -) -> Dataset: - logging.info("Calculating user index.") - result_ds = Dataset() - deprecated_name = user_index.get("indice_name", None) - if deprecated_name is not None: - user_index["index_name"] = deprecated_name - del user_index["indice_name"] - log.deprecation_warning("indice_name", "index_name") - user_indice_config = UserIndexConfig( - **user_index, - freq=config.frequency, - cf_vars=config.cf_variables, - is_percent=config.is_percent, - save_percentile=config.save_percentile, - ) - user_indice_da = compute_user_index(user_indice_config) - user_indice_da.attrs["units"] = _get_unit(config.out_unit, user_indice_da) - if user_indice_config.calc_operation is CalcOperation.ANOMALY: - # with anomaly time axis disappear - result_ds[user_indice_config.index_name] = user_indice_da - return result_ds - user_indice_da, time_bounds = config.frequency.post_processing(user_indice_da) - result_ds[user_indice_config.index_name] = user_indice_da - result_ds["time_bounds"] = time_bounds - return result_ds - - def _get_unit(output_unit: str | None, da: DataArray) -> str | None: - da_unit = da.attrs.get("units", None) + da_unit = da.attrs.get(UNITS_ATTRIBUTE_KEY, None) if da_unit is None: if output_unit is None: warn( @@ -397,83 +518,49 @@ def _get_unit(output_unit: str | None, da: DataArray) -> str | None: def _compute_standard_climate_index( - climate_index: ClimateIndex, + climate_index: GenericIndicator | None, config: IndexConfig, initial_history: str | None, initial_source: str, + reference: str, + rename: str | None = None, ) -> Dataset: - def compute(threshold: float | None = None): - conf = copy.copy(config) - if threshold is not None: - conf.threshold = threshold - if config.frequency.time_clipping is not None: - # xclim missing values checking system will not work with clipped time - with xclim.set_options(check_missing="skip"): - res = climate_index.compute(conf) - else: - res = climate_index.compute(conf) - if isinstance(res, tuple): - return res - else: - return (res, None) - - logging.info(f"Calculating climate index: {climate_index.short_name}") - result_ds = Dataset() - if config.threshold is not None: - thresh_key = ( - "percentiles" - if QUANTILE_BASED in climate_index.qualifiers - else "thresholds" - ) - if not isinstance(config.threshold, list): - thresholds = [config.threshold] - else: - thresholds = config.threshold - index_das = [] - per_das = [] - for th in thresholds: - index_da, per_da = compute(th) - index_da.coords[thresh_key] = th - index_das.append(index_da) - if per_da is not None: - per_das.append(per_da) - result_da = xr.concat(index_das, dim=thresh_key) - if len(per_das) > 0: - percentiles_da = xr.concat(per_das, dim=thresh_key) - else: - percentiles_da = None + result_da = climate_index(config) + if rename: + result_da = result_da.rename(rename) else: - result_da, percentiles_da = compute() - result_da.attrs["units"] = _get_unit(config.out_unit, result_da) - if config.frequency.post_processing is not None: + result_da = result_da.rename(climate_index.name) + result_da.attrs[UNITS_ATTRIBUTE_KEY] = _get_unit(config.out_unit, result_da) + if config.frequency.post_processing is not None and "time" in result_da.dims: resampled_da, time_bounds = config.frequency.post_processing(result_da) - result_ds[climate_index.short_name] = resampled_da + result_ds = resampled_da.to_dataset() if time_bounds is not None: result_ds["time_bounds"] = time_bounds result_ds.time.attrs["bounds"] = "time_bounds" else: - result_ds[climate_index.short_name] = result_da - if percentiles_da is not None: - result_ds = xr.merge([result_ds, percentiles_da]) + result_ds = result_da.to_dataset() + if config.save_thresholds: + result_ds = xr.merge( + [result_ds, _format_thresholds_for_export(config.climate_variables)] + ) history = _build_history(result_da, config, initial_history, climate_index) result_ds = _add_ecad_index_metadata( - result_ds, config, climate_index, history, initial_source + result_ds, climate_index, history, initial_source, reference ) return result_ds def _add_ecad_index_metadata( result_ds: Dataset, - config: IndexConfig, - computed_index: ClimateIndex, + computed_index: Indicator, history: str, initial_source: str, + reference: str, ) -> Dataset: result_ds.attrs.update( dict( - title=_build_title(computed_index, config), - references="ATBD of the ECA&D indices calculation" - " (https://knmi-ecad-assets-prd.s3.amazonaws.com/documents/atbd.pdf)", + title=computed_index.standard_name, + references=reference, institution="Climate impact portal (https://climate4impact.eu)", history=history, source=initial_source if initial_source is not None else "", @@ -485,45 +572,150 @@ def _add_ecad_index_metadata( return result_ds -def _build_title(computed_index: ClimateIndex, config: IndexConfig): - if config.threshold is not None: - return f"Index {computed_index.short_name} on threshold(s) {config.threshold}" - else: - return f"{computed_index.group.value} index {computed_index.short_name}" - - def _build_history( result_da: DataArray, config: IndexConfig, initial_history: str | None, - indice_computed: ClimateIndex, + indice_computed: Indicator, ) -> str: if initial_history is None: # get xclim history initial_history = result_da.attrs[HISTORY_CF_KEY] else: # append xclim history - initial_history = f"{initial_history}\n{result_da.attrs['history']}" + initial_history = f"{initial_history}\n{result_da.attrs[HISTORY_CF_KEY]}" del result_da.attrs[HISTORY_CF_KEY] current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") - start_time = result_da.time[0].dt.strftime("%m-%d-%Y").data[()] - end_time = result_da.time[-1].dt.strftime("%m-%d-%Y").data[()] return ( f"{initial_history}\n" f" [{current_time}]" - f" Calculation of {indice_computed.short_name}" - f" index({config.frequency.description})" - f" from {start_time} to {end_time}" + f" Calculation of {indice_computed.name}" + f" index ({config.frequency.adjective})" f" - icclim version: {ICCLIM_VERSION}" ) -def _check_valid_config(index: ClimateIndex, config: IndexConfig): - if index in get_season_excluded_indices() and config.frequency.indexer is not None: +def build_threshold( + threshold: str | Threshold, + doy_window_width: int, + base_period_time_range: Sequence[datetime | str] | None, + only_leap_years: bool, + interpolation: QuantileInterpolation, +) -> Threshold: + if isinstance(threshold, Threshold): + return threshold + else: + return Threshold( + threshold, + doy_window_width=doy_window_width, + reference_period=base_period_time_range, + only_leap_years=only_leap_years, + interpolation=interpolation, + ) + + +def _format_thresholds_for_export(climate_vars: list[ClimateVariable]) -> Dataset: + return xr.merge([_format_threshold(v) for v in climate_vars]) + + +def _format_threshold(cf_var: ClimateVariable) -> DataArray: + return cf_var.threshold.value.rename(cf_var.name + "_thresholds").reindex() + + +# TODO: [refacto] Move these functions "read_tagadada" to input_parsing or +# user_index_parsing + + +def read_indicator(user_index: UserIndexDict) -> GenericIndicator: + calc_op = CalcOperationRegistry.lookup(user_index["calc_operation"]) + map = { + CalcOperationRegistry.MAX: GenericIndicatorRegistry.lookup("Maximum"), + CalcOperationRegistry.MIN: GenericIndicatorRegistry.lookup("Minimum"), + CalcOperationRegistry.SUM: GenericIndicatorRegistry.lookup("Sum"), + CalcOperationRegistry.MEAN: GenericIndicatorRegistry.lookup("Average"), + CalcOperationRegistry.EVENT_COUNT: GenericIndicatorRegistry.lookup( + "CountOccurrences" + ), + CalcOperationRegistry.MAX_NUMBER_OF_CONSECUTIVE_EVENTS: GenericIndicatorRegistry.lookup( # noqa + "MaxConsecutiveOccurrence" + ), + CalcOperationRegistry.ANOMALY: GenericIndicatorRegistry.lookup( + "DifferenceOfMeans" + ), + } + if calc_op == CalcOperationRegistry.RUN_SUM: + if user_index["extreme_mode"] == "max": + indicator = GenericIndicatorRegistry.lookup("MaxOfRollingSum") + elif user_index["extreme_mode"] == "min": + indicator = GenericIndicatorRegistry.lookup("MinOfRollingSum") + else: + raise NotImplementedError() + elif calc_op == CalcOperationRegistry.RUN_MEAN: + if user_index["extreme_mode"] == "max": + indicator = GenericIndicatorRegistry.lookup("MaxOfRollingAverage") + elif user_index["extreme_mode"] == "min": + indicator = GenericIndicatorRegistry.lookup("MinOfRollingAverage") + else: + raise NotImplementedError() + else: + indicator = map.get(calc_op) + if indicator is None: raise InvalidIcclimArgumentError( - "Indices computing a spell cannot be computed on un-clipped season for now." - " Instead, you can use a clipped_season like this:" - "`slice_mode=['clipped_season', [12,1,2]]` (example of a DJF season)." - " However, it will NOT take into account spells beginning before the season" - " start!" + f"Unknown user_index calc_operation:" f" '{user_index['calc_operation']}'" + ) + return indicator + + +def read_threshold( + user_index: UserIndexDict, build_threshold: Callable[[str | Threshold], Threshold] +) -> Threshold | None | Sequence[Threshold]: + thresh = user_index.get("thresh", None) + if ( + thresh is None + or isinstance(thresh, Threshold) + or ( + isinstance(thresh, (tuple, list)) + and all(map(lambda th: isinstance(th, Threshold), thresh)) ) + ): + return thresh + logical_operation = user_index["logical_operation"] + if not isinstance(logical_operation, (tuple, list)): + logical_operation = [logical_operation] + logical_operation = [OperatorRegistry.lookup(op) for op in logical_operation] + if not isinstance(thresh, (tuple, list)): + thresh = [thresh] + acc = [] + for i, t in enumerate(thresh): + if isinstance(t, str) and t.endswith(PERCENTILE_THRESHOLD_STAMP): + var_type = user_index.get("var_type", None) + if var_type == USER_INDEX_TEMPERATURE_STAMP: + replace_unit = "doy_per" + elif var_type == USER_INDEX_PRECIPITATION_STAMP: + replace_unit = "period_per" + else: + replace_unit = "period_per" # default to period percentiles ? + t = t.replace(PERCENTILE_THRESHOLD_STAMP, " " + replace_unit) + else: + t = str(t) + acc.append(build_threshold(str(logical_operation[i].operand + t))) + return acc + + +def read_logical_link(user_index: UserIndexDict) -> LogicalLink: + # todo add unit test using it + logical_link = user_index.get("link_logical_operations", None) + if logical_link is None: + return LogicalLinkRegistry.LOGICAL_AND + else: + return LogicalLinkRegistry.lookup(logical_link) + + +def read_coef(user_index: UserIndexDict) -> float | None: + # todo add unit test using it + return user_index.get("coef", None) + + +def read_date_event(user_index: UserIndexDict) -> float | None: + # todo add unit test using it + return user_index.get("date_event", False) diff --git a/icclim/models/__init__.py b/icclim/models/__init__.py index 111138c9..8bd7786c 100644 --- a/icclim/models/__init__.py +++ b/icclim/models/__init__.py @@ -1,4 +1,4 @@ """ -models package contains mostly class and enum declarations and should have as less as -possible logic in it. +models package contains mostly DTO classes. they should have as few as +possible logic in them. """ diff --git a/icclim/models/cf_calendar.py b/icclim/models/cf_calendar.py index ddcd8c48..5773a991 100644 --- a/icclim/models/cf_calendar.py +++ b/icclim/models/cf_calendar.py @@ -1,33 +1,30 @@ from __future__ import annotations -from enum import Enum +import dataclasses from typing import Callable import numpy as np import xarray as xr from xarray import DataArray +from icclim.models.registry import Registry -def _proleptic_gregorian_leap(years: DataArray) -> DataArray: - return np.logical_or( - years % 400 == 0, np.logical_and(years % 100 != 0, years % 4 == 0) - ) +@dataclasses.dataclass +class CfCalendar: + aliases: list[str] + is_leap: Callable[[DataArray], np.ndarray] -def _julian_leap(years: DataArray) -> DataArray: - return years % 4 == 0 - - -def _standard_leap(years: DataArray) -> DataArray: - res = xr.full_like(years, False) - res[years < 1582] = _julian_leap(years[years < 1582]) - res[years >= 1582] = _proleptic_gregorian_leap(years[years >= 1582]) - return res + @property + def name(self) -> str: + return self.aliases[0] -class CfCalendar(Enum): +# todo: the whole class might be useless with the latest cftime +# (we don't need our own CfCalendar if we can do `da.time.dt.is_leap_year`) +class CfCalendarRegistry(Registry): """ - CF defined calendars with some additional aliases names for convenience. + Calendars known in CF plus some additional custom aliases for convenience. The first value of the aliases is the calendar "main" name. aliases: List[str] @@ -39,37 +36,48 @@ class CfCalendar(Enum): """ - NO_LEAP = ( + _item_class = CfCalendar + + NO_LEAP = CfCalendar( ["noleap", "no_leap", "days_365", "days365", "365_day", "365day"], lambda da: np.full_like(da.shape, False, dtype=bool), ) - DAYS_360 = ( + DAYS_360 = CfCalendar( ["360_day", "days_360", "360day", "days360"], lambda da: np.full_like(da.shape, False, dtype=bool), ) - ALL_LEAP = ( + ALL_LEAP = CfCalendar( ["all_leap", "allleap", "days_366", "days366", "366_day", "366day"], lambda da: np.full_like(da.shape, True, dtype=bool), ) - PROLEPTIC_GREGORIAN = ( + PROLEPTIC_GREGORIAN = CfCalendar( ["proleptic_gregorian", "prolepticgregorian"], lambda da: _proleptic_gregorian_leap(da).values, ) - JULIAN = (["julian"], lambda da: _julian_leap(da).values) - STANDARD = (["standard", "gregorian"], lambda da: _standard_leap(da).values) + JULIAN = CfCalendar(["julian"], lambda da: _julian_leap(da).values) + STANDARD = CfCalendar( + ["standard", "gregorian"], lambda da: _standard_leap(da).values + ) # Not sure what to do with none calendar - NONE = (["none"], lambda da: _standard_leap(da).values) + NONE = CfCalendar(["none"], lambda da: _standard_leap(da).values) + + @staticmethod + def get_item_aliases(item: CfCalendar) -> list[str]: + return list(map(str.upper, item.aliases)) - def __init__(self, aliases: list[str], is_leap: Callable[[DataArray], np.ndarray]): - self.aliases = aliases - self.is_leap = is_leap - def get_name(self) -> str: - return self.aliases[0] +def _proleptic_gregorian_leap(years: DataArray) -> DataArray: + return np.logical_or( + years % 400 == 0, np.logical_and(years % 100 != 0, years % 4 == 0) + ) - @staticmethod - def lookup(query: str) -> CfCalendar: - res = list(filter(lambda x: query.lower() in x.aliases, CfCalendar)) - if len(res) == 0: - raise TypeError(f"No calendars found for query '{query}'") - return res[0] + +def _julian_leap(years: DataArray) -> DataArray: + return years % 4 == 0 + + +def _standard_leap(years: DataArray) -> DataArray: + res = xr.full_like(years, False) + res[years < 1582] = _julian_leap(years[years < 1582]) + res[years >= 1582] = _proleptic_gregorian_leap(years[years >= 1582]) + return res diff --git a/icclim/models/cf_variable.py b/icclim/models/cf_variable.py deleted file mode 100644 index 644f9156..00000000 --- a/icclim/models/cf_variable.py +++ /dev/null @@ -1,28 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass - -from xarray import DataArray -from xclim.core.utils import PercentileDataArray - - -@dataclass() -class CfVariable: - """CfVariable groups together two xarray DataArray for the same variable. - One represent the whole studied period. The other is only the in base period used by - percentile based indices to compute percentiles. - This is an internal icclim structure. - - Parameters - ---------- - name: str - Name of the variable. - study_da: DataArray - The variable studied. - reference_da: DataArray - The variable studied limited to the in base period. - """ - - name: str - study_da: DataArray - reference_da: DataArray | PercentileDataArray | None = None diff --git a/icclim/models/climate_index.py b/icclim/models/climate_index.py deleted file mode 100644 index 36378777..00000000 --- a/icclim/models/climate_index.py +++ /dev/null @@ -1,47 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass -from typing import Any, Callable, Optional, Tuple, Union - -from xarray import DataArray - -from icclim.models.index_group import IndexGroup - -ComputeIndexFun = Callable[ - [Any], Union[DataArray, Tuple[DataArray, Optional[DataArray]]] -] - - -@dataclass -class ClimateIndex: - """Climate index data class. - - Attributes - ---------- - - short_name: str - The index name used in the output. - compute: Callable - The function to compute the index. It usually wraps a xclim functions. - group: IndexGroup - The index group category. - variables: List[List[str]] - The Cf variables needed to compute the index. - The variable are individually described by a list of aliases. - qualifiers: List[str] | None - ``optional`` List of configuration to compute the index. - Used internally to generate modules for C3S. - source: str | None - Where the index definition comes from. - definition: str | None - A formal definition of the index. It should describe what kind of output - the user is expected to obtain. - """ - - short_name: str - compute: ComputeIndexFun - group: IndexGroup - input_variables: list[list[str]] - qualifiers: list[str] | None = None - source: str | None = None - definition: str | None = None diff --git a/icclim/models/climate_variable.py b/icclim/models/climate_variable.py new file mode 100644 index 00000000..77199ea6 --- /dev/null +++ b/icclim/models/climate_variable.py @@ -0,0 +1,305 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Callable, Sequence + +import xarray +from xarray.core.dataarray import DataArray + +from icclim.generic_indices.cf_var_metadata import StandardVariable +from icclim.icclim_exceptions import InvalidIcclimArgumentError +from icclim.icclim_types import InFileBaseType, InFileLike +from icclim.models.constants import UNITS_ATTRIBUTE_KEY +from icclim.models.frequency import Frequency, FrequencyRegistry +from icclim.models.global_metadata import GlobalMetadata +from icclim.models.standard_index import StandardIndex +from icclim.models.threshold import Threshold +from icclim.pre_processing.in_file_dictionary import InFileDictionary +from icclim.pre_processing.input_parsing import ( + DEFAULT_INPUT_FREQUENCY, + build_reference_da, + build_studied_data, + guess_input_type, + guess_var_names, + read_dataset, +) + +# TODO: [refacto] a model file/class should not have that much logic, +# move stuff to a ClimateVariableFactory or something similar + + +@dataclass +class ClimateVariable: + """Internal icclim structure. It groups together the input variable (studied_data), + its associated metadata (standard_var) and the threshold it must be compared to. + + Attributes + ---------- + name: str + Name of the variable. + standard_var: StandardVariable + CF metadata bounded to the standard variable used for this ClimateVariable. + studied_data: DataArray + The variable studied. + threshold: Threshold | None + thresholds for this variable + """ + + name: str + standard_var: StandardVariable | None + studied_data: DataArray + global_metadata: GlobalMetadata + source_frequency: Frequency + threshold: Threshold | None = None + is_reference: bool = False + + def build_indicator_metadata( + self, src_freq: Frequency, must_run_bootstrap: bool, indicator_name: str + ) -> dict[str, str] | None: + metadata = {"threshold": {}} + if self.standard_var is None: + metadata.update( + dict( + standard_name="unknown_variable", + long_name="unknown variable", + short_name="input", + ) + ) + else: + metadata.update(self.standard_var.get_metadata()) + if self.threshold is not None: + metadata.update( + { + "threshold": self.threshold.get_metadata( + src_freq, must_run_bootstrap, indicator_name + ) + } + ) + return metadata + + +def build_climate_vars( + climate_vars_dict: dict[str, InFileDictionary], + ignore_Feb29th: bool, + time_range: Sequence[str], + base_period: Sequence[str] | None, + standard_index: StandardIndex | None, + is_compared_to_reference: bool, +) -> list[ClimateVariable]: + if standard_index is not None and len(standard_index.input_variables) > len( + climate_vars_dict + ): + raise InvalidIcclimArgumentError( + f"Index {standard_index.short_name} needs" + f" {len(standard_index.input_variables)} variables." + f" Please provide them with an xarray.Dataset, netCDF file(s) or a" + f" zarr store." + ) + acc = [] + for i, raw_climate_var in enumerate(climate_vars_dict.items()): + if standard_index is not None: + standard_var = standard_index.input_variables[i] + else: + standard_var = None + acc.append( + _build_climate_var( + raw_climate_var[0], + raw_climate_var[1], + ignore_Feb29th, + time_range, + standard_var=standard_var, + ) + ) + if is_compared_to_reference: + standard_var = ( + standard_index.input_variables[0] if standard_index is not None else None + ) + added_var = _build_reference_variable( + base_period, + climate_vars_dict, + standard_var=standard_var, + ) + acc.append(added_var) + return acc + + +def _build_reference_variable( + reference_period: Sequence[str] | None, + in_files, + standard_var: StandardVariable, +) -> ClimateVariable: + """This function add a secondary variable for indices such as anomaly that needs + exactly two variables but where the second variable could just be a subset of the + first one. + """ + if reference_period is None: + raise InvalidIcclimArgumentError( + "Can't build a reference variable without a `base_period_time_range`" + ) + var_name = list(in_files.keys())[0] + if isinstance(in_files, dict): + study_ds = read_dataset( + list(in_files.values())[0]["study"], + standard_var=standard_var, + var_name=var_name, + ) + else: + study_ds = read_dataset( + list(in_files.values())[0], standard_var=standard_var, var_name=var_name + ) + studied_data = build_reference_da( + study_ds[var_name], + reference_period, + only_leap_years=False, + percentile_min_value=None, + ) + return ClimateVariable( + name=var_name + "_reference", + standard_var=standard_var, + studied_data=studied_data, + threshold=None, + global_metadata={ + "history": study_ds.attrs.get("history", None), + "source": study_ds.attrs.get("source", None), + "time_encoding": study_ds.time.encoding, + }, + source_frequency=FrequencyRegistry.lookup( + xarray.infer_freq(studied_data.time) or DEFAULT_INPUT_FREQUENCY + ), + is_reference=True, + ) + + +def must_add_reference_var( + climate_vars_dict: dict[str, InFileDictionary], + reference_period: Sequence[str] | None, +) -> bool: + """True whenever the input has no threshold and only one studied variable but there + is a reference period. + Example case: the anomaly of tx(1960-2100) by tx(1960-1990). + """ + t = list(climate_vars_dict.values())[0].get("thresholds", None) + return t is None and len(climate_vars_dict) == 1 and reference_period is not None + + +def to_dictionary( + in_files: InFileLike, + var_names: Sequence[str] | None, + threshold: Threshold | Sequence[Threshold] | None, + standard_index: StandardIndex | None, +) -> dict[str, InFileDictionary]: + if isinstance(in_files, dict): + if var_names is not None: + raise InvalidIcclimArgumentError( + "`var_name` must be None when `in_files` is a dictionary." + " The dictionary keys are used in place of `var_name`." + ) + if isinstance(list(in_files.values())[0], dict): + # case of in_files={tasmax: {"study": "tasmax.nc"}} + return in_files + else: + # case of in_files={tasmax: "tasmax.nc"} + return _build_in_file_dict( + in_files=list(in_files.values()), + standard_index=standard_index, + threshold=threshold, + var_names=list(in_files.keys()), + ) + else: + # case of in_files="tasmax.nc" and var_names="tasmax" + return _build_in_file_dict(in_files, var_names, threshold, standard_index) + + +def _build_in_file_dict( + in_files: InFileBaseType, + var_names: Sequence[str], + threshold: Threshold | Sequence[Threshold] | None, + standard_index: StandardIndex | None, +): + standard_var = ( + standard_index.input_variables[0] if standard_index is not None else None + ) + input_dataset = read_dataset( + in_files=in_files, standard_var=standard_var, var_name=var_names + ) + var_names = guess_var_names( + ds=input_dataset, standard_index=standard_index, var_names=var_names + ) + if threshold is not None: + if not isinstance(threshold, Sequence): + threshold = [threshold] + if len(threshold) != len(var_names): + raise InvalidIcclimArgumentError( + "There must be as many thresholds as there are variables. There was" + f" {len(threshold)} thresholds and {len(var_names)} variables." + ) + return { + var_name: {"study": input_dataset[var_name], "thresholds": threshold[i]} + for i, var_name in enumerate(var_names) + } + else: + return {var_name: {"study": input_dataset[var_name]} for var_name in var_names} + + +def _build_climate_var( + climate_var_name: str, + climate_var_data: InFileDictionary | InFileBaseType, + ignore_Feb29th: bool, + time_range: Sequence[str], + standard_var: StandardVariable | None, +) -> ClimateVariable: + if isinstance(climate_var_data, dict): + study_ds = read_dataset( + climate_var_data["study"], standard_var, climate_var_name + ) + # todo: deprecate climate_var_data.get("per_var_name", None) + # for threshold_var_name + climate_var_thresh = climate_var_data.get("thresholds", None) + else: + climate_var_data: InFileBaseType + study_ds = read_dataset(climate_var_data, standard_var, climate_var_name) + climate_var_thresh = None + if standard_var is None: + standard_var = guess_input_type(study_ds[climate_var_name]) + studied_data = build_studied_data( + study_ds[climate_var_name], + time_range, + ignore_Feb29th, + standard_var, + ) + if climate_var_thresh is not None: + climate_var_thresh = _build_threshold( + climate_var_thresh=climate_var_thresh, + original_data=study_ds[climate_var_name], + conversion_unit=studied_data.attrs[UNITS_ATTRIBUTE_KEY], + ) + return ClimateVariable( + name=climate_var_name, + standard_var=standard_var, + studied_data=studied_data, + threshold=climate_var_thresh, + global_metadata={ + "history": study_ds.attrs.get("history", None), + "source": study_ds.attrs.get("source", None), + "time_encoding": study_ds.time.encoding, + }, + source_frequency=FrequencyRegistry.lookup( + xarray.infer_freq(studied_data.time) or DEFAULT_INPUT_FREQUENCY + ), + ) + + +def _build_threshold( + climate_var_thresh: str | Threshold, + original_data: DataArray, + conversion_unit: str, +) -> Threshold: + if isinstance(climate_var_thresh, str): + climate_var_thresh: Threshold = Threshold(climate_var_thresh) + if isinstance(climate_var_thresh.value, Callable): + climate_var_thresh.value = climate_var_thresh.value( + studied_data=original_data, + ) + climate_var_thresh.unit = conversion_unit + climate_var_thresh.value = climate_var_thresh.value.chunk("auto") + return climate_var_thresh diff --git a/icclim/models/constants.py b/icclim/models/constants.py index 02e23f32..ce618cc7 100644 --- a/icclim/models/constants.py +++ b/icclim/models/constants.py @@ -5,23 +5,21 @@ ICCLIM_VERSION = "5.4.0" -# placeholder for user_index +# placeholders for user_index PERCENTILE_THRESHOLD_STAMP = "p" WET_DAY_THRESHOLD = 1 # 1mm -PRECIPITATION = "p" -TEMPERATURE = "t" +USER_INDEX_PRECIPITATION_STAMP = "p" +USER_INDEX_TEMPERATURE_STAMP = "t" # percentiles dimension from percentile_doy PERCENTILES_COORD = "percentiles" # attribut holding the in_base time bounds -IN_BASE_IDENTIFIER = "reference_epoch" +REFERENCE_PERIOD_ID = "reference_epoch" +# coordinate of day of year values (usually from 1 to 365/366) +DOY_COORDINATE = "dayofyear" +# Units attribute key for DataArray(s) +UNITS_ATTRIBUTE_KEY = "units" -# Aliases of input variables names. -# Source: clix-meta (modified) -PR = ["pr", "pradjust","prAdjust", "prec", "rr", "precip", "PREC", "Prec", "RR", "PRECIP", "Precip"] -TAS = ["tas", "tavg", "ta", "tasadjust","tasAdjust", "tmean", "tm", "tg", "meant", "TMEAN", "Tmean", "TM", "TG", "MEANT", "meanT", "tasmidpoint"] -TAS_MAX = ["tasmax", "tasmaxadjust","tasmaxAdjust", "tmax", "tx", "maxt", "TMAX", "Tmax", "TX", "MAXT", "maxT"] -TAS_MIN = ["tasmin", "tasminadjust","tasminAdjust", "tmin", "tn", "mint", "TMIN", "Tmin", "TN", "MINT", "minT"] # Aliases of input percentiles variables names # Source icclim dev @@ -33,16 +31,51 @@ # Index qualifiers (needed to generate the API) QUANTILE_BASED = "QUANTILE_BASED" # fields: QUANTILE_INDEX_FIELDS MODIFIABLE_UNIT = "MODIFIABLE_UNIT" # fields: out_unit -MODIFIABLE_THRESHOLD = "MODIFIABLE_THRESHOLD" # fields: threshold -MODIFIABLE_QUANTILE_WINDOW = "MODIFIABLE_QUANTILE_WINDOW" # fields: window_width # Map of months index to their short name, used to get a pandas frequency anchor MONTHS_MAP = {1:"JAN", 2:"FEB", 3:"MAR", 4:"APR", 5:"MAY", 6:"JUN", 7:"JUL", 8:"AUG", 9:"SEP", 10:"OCT", 11:"NOV", 12:"DEC" } # Season defined by their month numbers -AMJJAS_MONTHS = [*range(4, 9)] -ONDJFM_MONTHS = [10, 11, 12, 1, 2, 3] -DJF_MONTHS = [12, 1, 2] -MAM_MONTHS = [*range(3, 6)] -JJA_MONTHS = [*range(6, 9)] -SON_MONTHS = [*range(9, 12)] +AMJJAS_MONTHS:list[int] = [*range(4, 9)] +ONDJFM_MONTHS:list[int] = [10, 11, 12, 1, 2, 3] +DJF_MONTHS:list[int] = [12, 1, 2] +MAM_MONTHS:list[int] = [*range(3, 6)] +JJA_MONTHS:list[int] = [*range(6, 9)] +SON_MONTHS:list[int] = [*range(9, 12)] + +# pseudo units used with Threshold class (not in Pint) +PERIOD_PERCENTILE_UNIT = "period_per" +DOY_PERCENTILE_UNIT = "doy_per" + +# Mapping of frequencies to generate metadata +# copied from xclim and updated. +EN_FREQ_MAPPING = { + "YS": "year(s)", "Y": "year(s)", "AS": "year(s)", "A": "year(s)", + "QS": "season(s)", "Q": "season(s)", + "MS": "month(s)", "M": "month(s)", + "W": "week(s)", + "D": "day(s)", + "H": "hour(s)", + "JAN": "January starting", "FEB": "February starting", "MAR": "March starting", "APR": "April starting", "MAY": "May starting", "JUN": "June starting", "JUL": "July starting", "AUG": "August starting", "SEP": "September starting", "OCT": "October starting", "NOV": "November starting", "DEC": "December starting", + # Arguments to "indexer" + "DJF": "wintry", "MAM": "springlong", "JJA": "summery", "SON": "autumnal", + "norm": "Normal", + "m1": "january", "m2": "february", "m3": "march", "m4": "april", "m5": "may", "m6": "june", "m7": "july", "m8": "august", "m9": "september", "m10": "october", "m11": "november", "m12": "december", + "MON": "monday starting", "TUE": "tuesday starting", "WED": "wednesday starting", "THU": "thursday starting", "FRI": "friday starting", "SAT": "saturday starting", "SUN": "sunday starting" +} +FREQ_DELTA_MAPPING = { + "YS": (1, "Y" ), "Y": (1, "Y" ), "AS": (1, "Y" ), "A": (1, "Y" ), + "QS": (3, "M" ), "Q": (3, "M" ), + "MS": (1, "M" ), "M": (1, "M" ), + "W": (7, "D" ), + "D": (1, "D" ), + "H": (1, "H" ), +} + +# Special CF unit +PART_OF_A_WHOLE_UNIT = "1" + +# companion parameter to slice_mode +GROUP_BY_METHOD = "groupby" +RESAMPLE_METHOD = "resample" +GROUP_BY_REF_AND_RESAMPLE_STUDY_METHOD = "groupby_ref_and_resample_study" diff --git a/icclim/models/frequency.py b/icclim/models/frequency.py index 11ace209..0b3791b9 100644 --- a/icclim/models/frequency.py +++ b/icclim/models/frequency.py @@ -1,58 +1,47 @@ """ `icclim.models.frequency` wraps the concept of pandas frequency in order to resample - time series. `slice_mode` parameter of `icclim.index` is always converted to a + time series. `slice_mode` parameter of `icclim.index` is always converted to a `Frequency`. """ from __future__ import annotations +import dataclasses +import re from datetime import timedelta -from enum import Enum -from typing import Any, Callable, Dict, List, Literal, Tuple, Union +from typing import Any, Callable, Sequence import cftime import numpy as np import pandas as pd import xarray as xr -import xclim.core.calendar from pandas.tseries.frequencies import to_offset from xarray.core.dataarray import DataArray from icclim.icclim_exceptions import InvalidIcclimArgumentError +from icclim.icclim_types import FrequencyLike, Indexer from icclim.models.constants import ( AMJJAS_MONTHS, DJF_MONTHS, + EN_FREQ_MAPPING, + FREQ_DELTA_MAPPING, JJA_MONTHS, MAM_MONTHS, MONTHS_MAP, ONDJFM_MONTHS, SON_MONTHS, ) +from icclim.models.registry import Registry from icclim.utils import read_date SEASON_ERR_MSG = ( "A season created using `slice_mode` must be made of either" - " consecutive integer for months such as [1,2,3] or two string for" - " dates such as ['19 july', '14 august']." + " consecutive integers for months such as [1,2,3] or two date strings" + " such as ['19 july', '14 august']." ) - -def _get_end_date( - use_cftime: bool, year: int, month: int, day: int = None, calendar=None -): - delta = timedelta(days=0) - if day is None: - if month == 12: - day = 31 - else: - # get the next month and subtract a day (handle any month and leap years) - month = month + 1 - day = 1 - delta = timedelta(days=1) - if use_cftime: - end = cftime.datetime(year, month, day, calendar=calendar) - else: - end = pd.to_datetime(f"{year}-{month}-{day}") - return end - delta +# RUN_INDEXER is a special value used for group by when there is no proper groupby to do +# but instead a filtering should be applied before the reducer. +RUN_INDEXER = "run_indexer" def get_seasonal_time_updater( @@ -119,7 +108,7 @@ def add_time_bounds(da: DataArray) -> tuple[DataArray, DataArray]: return add_time_bounds -def _get_time_bounds_updater( +def get_time_bounds_updater( freq: str, ) -> Callable[[DataArray], tuple[DataArray, DataArray]]: def add_time_bounds(da: DataArray) -> tuple[DataArray, DataArray]: @@ -159,208 +148,233 @@ def add_time_bounds(da: DataArray) -> tuple[DataArray, DataArray]: return add_time_bounds -class _Freq: - """Internal class to ease writing and maintaining the enum. - Without it, in the instanciation of enum values we would have to write tuples - would not be able to use kwargs, which make the code less readable. - """ +@dataclasses.dataclass +class Frequency: + """Time sampling frequency.""" - def __init__( - self, - pandas_freq: str, - accepted_values: list[str], - description: str, - post_processing: Callable[[DataArray], tuple[DataArray, DataArray]], - indexer: Indexer | None, - time_clipping: Callable = None, - ): - self.pandas_freq: str = pandas_freq - self.accepted_values: list[str] = accepted_values - self.description = description - self.post_processing = post_processing - self.indexer = indexer - # time_clipping is a workaround for a "missing" feature of xclim. - # It allow to compute seasons for indices computing spells by ignoring values - # outside the season bounds. - self.time_clipping = time_clipping - - -class Frequency(Enum): - """The sampling frequency of the resulting dataset.""" - - MONTH = _Freq( + pandas_freq: str + accepted_values: list[str] + adjective: str + post_processing: Callable[[DataArray], tuple[DataArray, DataArray]] | None + units: str + indexer: Indexer | None + long_name: str + group_by_key: str | None + delta: timedelta | np.timedelta64 + + def build_frequency_kwargs(self) -> dict[str, Any]: + """Build kwargs with possible keys in {"freq", "month", "date_bounds"}""" + kwargs = dict(freq=self.pandas_freq) + if self.indexer is not None: + kwargs.update(self.indexer) + return kwargs + + +class FrequencyRegistry(Registry): + _item_class = Frequency + + HOUR = Frequency( + pandas_freq="H", + accepted_values=["hour", "h", "hourly"], + adjective="hourly", + indexer=None, + post_processing=get_time_bounds_updater("H"), + units="hours", + long_name="hour", + group_by_key="time.hour", + delta=np.timedelta64(1, "h"), + ) + """Resample to hourly values""" + + DAY = Frequency( + pandas_freq="D", + accepted_values=["daily", "day", "days", "d"], + adjective="daily", + indexer=None, + post_processing=get_time_bounds_updater("D"), + units="days", + long_name="day", + group_by_key="time.dayofyear", + delta=np.timedelta64(1, "D"), + ) + """Resample to daily values""" + + MONTH = Frequency( pandas_freq="MS", - accepted_values=["month", "MS"], - description="monthly time series", + accepted_values=["month", "monthly", "MS"], + adjective="monthly", indexer=None, - post_processing=_get_time_bounds_updater("MS"), + post_processing=get_time_bounds_updater("MS"), + units="months", + long_name="month", + group_by_key="time.month", + delta=np.timedelta64(1, "M"), ) """Resample to monthly values""" - YEAR = _Freq( + YEAR = Frequency( pandas_freq="YS", - accepted_values=["year", "YS"], - description="annual time series", + accepted_values=["year", "yearly", "annual", "YS"], + adjective="annual", indexer=None, - post_processing=_get_time_bounds_updater("YS"), + post_processing=get_time_bounds_updater("YS"), + units="years", + long_name="year", + group_by_key="time.year", + delta=np.timedelta64(1, "Y"), ) """Resample to yearly values.""" - AMJJAS = _Freq( + AMJJAS = Frequency( pandas_freq="AS-APR", accepted_values=["AMJJAS"], - description="summer half-year time series", + adjective="AMJJAS summery", indexer=dict(month=AMJJAS_MONTHS), post_processing=get_seasonal_time_updater(AMJJAS_MONTHS[0], AMJJAS_MONTHS[-1]), + units="half_year_summers", + long_name="AMJJAS season", + group_by_key=RUN_INDEXER, + delta=np.timedelta64(6, "M"), ) """Resample to summer half-year, from April to September included.""" - ONDJFM = _Freq( + ONDJFM = Frequency( pandas_freq="AS-OCT", accepted_values=["ONDJFM"], - description="winter half-year time series", + adjective="ONDJFM wintry", indexer=dict(month=ONDJFM_MONTHS), post_processing=get_seasonal_time_updater(ONDJFM_MONTHS[0], ONDJFM_MONTHS[-1]), + units="half_year_winters", + long_name="ONDJFM season", + group_by_key=RUN_INDEXER, + delta=np.timedelta64(6, "M"), ) """Resample to winter half-year, from October to March included.""" - DJF = _Freq( + DJF = Frequency( pandas_freq="AS-DEC", accepted_values=["DJF"], - description="winter time series", + adjective="DJF wintry", indexer=dict(month=DJF_MONTHS), post_processing=get_seasonal_time_updater(DJF_MONTHS[0], DJF_MONTHS[-1]), + units="winters", + long_name="DJF winter", + group_by_key=RUN_INDEXER, + delta=np.timedelta64(3, "M"), ) """Resample to winter season, from December to February included.""" - MAM = _Freq( + MAM = Frequency( pandas_freq="AS-MAR", accepted_values=["MAM"], - description="spring time series", + adjective="MAM springlong", indexer=dict(month=MAM_MONTHS), post_processing=get_seasonal_time_updater(MAM_MONTHS[0], MAM_MONTHS[-1]), + units="springs", + long_name="MAM season", + group_by_key=RUN_INDEXER, + delta=np.timedelta64(3, "M"), ) """Resample to spring season, from March to May included.""" - JJA = _Freq( + JJA = Frequency( pandas_freq="AS-JUN", accepted_values=["JJA"], - description="summer time series", + adjective="JJA summery", indexer=dict(month=JJA_MONTHS), post_processing=get_seasonal_time_updater(JJA_MONTHS[0], JJA_MONTHS[-1]), + units="summers", + long_name="JJA season", + group_by_key=RUN_INDEXER, + delta=np.timedelta64(3, "M"), ) """Resample to summer season, from June to Agust included.""" - SON = _Freq( + SON = Frequency( pandas_freq="AS-SEP", accepted_values=["SON"], - description="autumn time series", + adjective="SON autumnal", indexer=dict(month=SON_MONTHS), post_processing=get_seasonal_time_updater(SON_MONTHS[0], SON_MONTHS[-1]), + units="autumns", + long_name="SON season", + group_by_key=RUN_INDEXER, + delta=np.timedelta64(3, "M"), ) """Resample to fall season, from September to November included.""" - CUSTOM = _Freq( - pandas_freq="MS", - accepted_values=[], - description="", - indexer=None, - post_processing=lambda x: x, - ) - """Placeholder instance for custom sampling frequencies. - Do not use as is, use `slice_mode` with "month", "season" keywords instead. - """ - - def __init__(self, freq: _Freq): - self._freq = freq - - @property - def pandas_freq(self): - return self._freq.pandas_freq - - @property - def accepted_values(self): - return self._freq.accepted_values - - @property - def description(self): - return self._freq.description - - @property - def post_processing(self): - return self._freq.post_processing - - @property - def indexer(self): - return self._freq.indexer - - @property - def time_clipping(self): - return self._freq.time_clipping - - @staticmethod - def lookup(slice_mode: SliceMode) -> Frequency: - if isinstance(slice_mode, Frequency): - return slice_mode - if isinstance(slice_mode, str): - return _get_frequency_from_string(slice_mode) - if isinstance(slice_mode, list) or isinstance(slice_mode, tuple): - return _get_frequency_from_iterable(slice_mode) + @classmethod + def lookup(cls, item: FrequencyLike, no_error: bool = False) -> Frequency | None: + if isinstance(item, Frequency): + return item + if isinstance(item, str): + return _get_frequency_from_string(item) + if isinstance(item, (list, tuple)): + return _get_frequency_from_iterable(item) + if no_error: + return None raise InvalidIcclimArgumentError( - f"Unknown frequency {slice_mode}." - f"Use a Frequency from {[f for f in Frequency]}" + f"Unknown frequency {item}." + f" Use a Frequency from {[f for f in FrequencyRegistry.all_aliases()]}" ) @staticmethod - def is_seasonal(slice_mode: SliceMode) -> bool: - return Frequency.lookup(slice_mode) in [ - Frequency.CUSTOM, - Frequency.ONDJFM, - Frequency.AMJJAS, - Frequency.MAM, - Frequency.JJA, - Frequency.SON, - Frequency.DJF, - ] + def get_item_aliases(item: Frequency) -> list[str]: + return item.accepted_values - def build_frequency_kwargs(self) -> dict[str, Any]: - """Build kwargs with possible keys in {"freq", "month", "date_bounds"}""" - kwargs = dict(freq=self._freq.pandas_freq) - if self._freq.indexer is not None: - kwargs.update(self._freq.indexer) - return kwargs + +def _get_end_date( + use_cftime: bool, year: int, month: int, day: int = None, calendar=None +): + delta = timedelta(days=0) + if day is None: + if month == 12: + day = 31 + else: + # get the next month and subtract a day (handle any month and leap years) + month = month + 1 + day = 1 + delta = timedelta(days=1) + if use_cftime: + end = cftime.datetime(year, month, day, calendar=calendar) + else: + end = pd.to_datetime(f"{year}-{month}-{day}") + return end - delta -def _get_frequency_from_string(slice_mode: str) -> Frequency: - for freq in Frequency: - if freq.name == slice_mode.upper() or slice_mode.upper() in map( +def _get_frequency_from_string(query: str) -> Frequency: + for key, freq in FrequencyRegistry.catalog().items(): + if key == query.upper() or query.upper() in map( str.upper, freq.accepted_values ): return freq - # else assumes it's a pandas frequency (such as W or 3MS) + # else assumes it's a pandas frequency (such as "W" or "3MS") try: - to_offset(slice_mode) # no-op, used to check if it's a valid pandas freq + to_offset(query) # no-op, used to check if it's a valid pandas freq except ValueError as e: raise InvalidIcclimArgumentError( - f"Unknown frequency {slice_mode}. Use either a" + f"Unknown frequency {query}. Use either a" " valid icclim frequency or a valid pandas" " frequency", e, ) - Frequency.CUSTOM._freq = _Freq( - post_processing=_get_time_bounds_updater(slice_mode), - pandas_freq=slice_mode, - description=f"time series sampled on {slice_mode}", + return Frequency( + post_processing=get_time_bounds_updater(query), + pandas_freq=query, + adjective=f"time series sampled on {query}", accepted_values=[], indexer=None, + units=query, + long_name=_get_long_name(query), + group_by_key=None, + delta=_get_delta(query), ) - return Frequency.CUSTOM def _is_season_valid(months: list[int]) -> bool: is_valid = True for i in range(0, len(months) - 1): - is_valid = is_valid and months[i] > 0 and months[i] < 13 + is_valid = is_valid and 0 < months[i] < 13 if months[i] > months[i + 1]: is_valid = is_valid and months[i + 1] == 1 and months[i] == 12 else: @@ -369,7 +383,7 @@ def _is_season_valid(months: list[int]) -> bool: def _get_frequency_from_iterable( - slice_mode_list: list | tuple[str, list | tuple] + slice_mode_list: list | tuple[str, Sequence] ) -> Frequency: if len(slice_mode_list) < 2: raise InvalidIcclimArgumentError( @@ -378,104 +392,119 @@ def _get_frequency_from_iterable( " its second a list (e.g `slice_mode=['season', [1,2,3]]` )." ) freq_keyword = slice_mode_list[0] - custom_freq = Frequency.CUSTOM if freq_keyword in ["month", "months"]: - custom_freq._freq = _build_frequency_filtered_by_month(slice_mode_list[1]) - elif freq_keyword == "season": + return _build_frequency_filtered_by_month(slice_mode_list[1]) + elif freq_keyword in ["season", "seasons"]: season = slice_mode_list[1] - custom_freq._freq = _build_seasonal_freq(season, False) - elif freq_keyword == "clipped_season": - season = slice_mode_list[1] - custom_freq._freq = _build_seasonal_freq(season, True) + return _build_seasonal_freq(season) else: raise InvalidIcclimArgumentError( f"Unknown frequency {slice_mode_list}." " The sampling frequency must be one of {'season', 'month'}" ) - return custom_freq -def _build_frequency_filtered_by_month(months: list[int]) -> _Freq: - return _Freq( +def _build_frequency_filtered_by_month(months: Sequence[int]) -> Frequency: + return Frequency( indexer=dict(month=months), - post_processing=_get_time_bounds_updater("MS"), + post_processing=get_time_bounds_updater("MS"), pandas_freq="MS", - description=f"monthly time series (months: {months})", + adjective="monthly", accepted_values=[], + units="months", + long_name=f"monthly time series (months: {months})", + group_by_key="time.month", + delta=np.timedelta64(1, "M"), ) -def _build_seasonal_freq(season: tuple | list, clipped: bool): +def _build_seasonal_freq(season: Sequence): if isinstance(season[0], str): - return _build_seasonal_frequency_between_dates(season, clipped) - elif isinstance(season, Tuple) or isinstance(season[0], int): - return _build_seasonal_frequency_for_months(season, clipped) + return _build_seasonal_frequency_between_dates(season) + elif isinstance(season, tuple) or isinstance(season[0], int): + return _build_seasonal_frequency_for_months(season) + else: + raise NotImplementedError() -def _build_seasonal_frequency_between_dates(season: list[str], clipped: bool) -> _Freq: +def _build_seasonal_frequency_between_dates(season: Sequence[str]) -> Frequency: if len(season) != 2: raise InvalidIcclimArgumentError(SEASON_ERR_MSG) begin_date = read_date(season[0]) end_date = read_date(season[1]) begin_formatted = begin_date.strftime("%m-%d") end_formatted = end_date.strftime("%m-%d") - if clipped: - indexer = None - time_clipping = _get_filter_between_dates(begin_formatted, end_formatted) - else: - indexer = dict(date_bounds=(begin_formatted, end_formatted)) - time_clipping = None - return _Freq( + indexer = dict(date_bounds=(begin_formatted, end_formatted)) + return Frequency( indexer=indexer, post_processing=get_seasonal_time_updater( begin_date.month, end_date.month, begin_date.day, end_date.day ), pandas_freq=f"AS-{MONTHS_MAP[begin_date.month]}", - description=f"seasonal time series" - f" (season: from {begin_formatted} to {end_formatted})", + adjective="seasonally", accepted_values=[], - time_clipping=time_clipping, + units=f"{MONTHS_MAP[begin_date.month]}_{MONTHS_MAP[end_date.month]}_seasons", + long_name=f"seasonal time series" + f" (season: from {begin_formatted} to {end_formatted})", + group_by_key=RUN_INDEXER, + delta=np.timedelta64(end_date - begin_date), ) -def _build_seasonal_frequency_for_months(season: tuple | list, clipped: bool): +def _build_seasonal_frequency_for_months(season: tuple | list): if isinstance(season, tuple): # concat in case of ([12], [1, 2]) season = season[0] + season[1] if not _is_season_valid(season): raise InvalidIcclimArgumentError(SEASON_ERR_MSG) - if clipped: - indexer = None - time_clipping = _get_month_filter(season) - else: - indexer = dict(month=season) - time_clipping = None - return _Freq( + indexer = dict(month=season) + return Frequency( indexer=indexer, - time_clipping=time_clipping, post_processing=get_seasonal_time_updater(season[0], season[-1]), pandas_freq=f"AS-{MONTHS_MAP[season[0]]}", - description=f"seasonal time series (season: {season})", + adjective="seasonally", accepted_values=[], + units=f"{MONTHS_MAP[season[0]]}_{MONTHS_MAP[season[-1]]}_seasons", + long_name=f"seasonal time series (season: {season})", + group_by_key=RUN_INDEXER, + delta=np.timedelta64(len(season), "M"), ) -def _get_month_filter(season): - return lambda da: xclim.core.calendar.select_time(da, month=season) +def _get_long_name(pandas_freq: str) -> str: + no_digit_freq = re.findall(r"\D+", pandas_freq)[0] + multiplier = re.findall(r"\d+", pandas_freq) + freqs = no_digit_freq.split("-")[::-1] # reverse + freqs = [EN_FREQ_MAPPING[f] for f in freqs] + freqs = " ".join(freqs) + if multiplier: + return multiplier[0] + freqs + else: + return freqs -def _get_filter_between_dates(begin_date: str, end_date: str): - return lambda da: xclim.core.calendar.select_time( - da, date_bounds=(begin_date, end_date) - ) +def _get_delta(pandas_freq: str) -> np.timedelta64: + """ + Build timedelta from a "pandas frequency" string. + A "pandas frequency" string may look like ["2AS-DEC", "3W-TUE", "M", ... ] + The anchor, such as "DEC" in "AS-DEC", does not modify the delta. + Parameters + ---------- + pandas_freq : str + The frequency query. -SliceMode = Union[ - Frequency, str, List[Union[str, Tuple, int]], Tuple[str, Union[List, Tuple]] -] -MonthsIndexer = Dict[Literal["month"], List[int]] # format [12,1,2,3] -DatesIndexer = Dict[ - Literal["date_bounds"], Tuple[str, str] -] # format ("01-25", "02-28") -ClippedSeasonIndexer = Callable -Indexer = Union[MonthsIndexer, DatesIndexer, ClippedSeasonIndexer] + Returns + ------- + The timedelta corresponding to this frequency. + For example, "2AS-DEC" would return a 2 years delta. + """ + # [0] to ignore the anchor + non_digit = re.findall(r"\D+", pandas_freq)[0].split("-")[0] + base, freq = FREQ_DELTA_MAPPING[non_digit] + # we assume the starting digits are the multiplier. + multiplier = int(re.findall(r"\d+", pandas_freq)[0]) + if multiplier: + return np.timedelta64(base * multiplier, freq) + else: + return np.timedelta64(base, freq) diff --git a/icclim/models/global_metadata.py b/icclim/models/global_metadata.py new file mode 100644 index 00000000..2f32bc70 --- /dev/null +++ b/icclim/models/global_metadata.py @@ -0,0 +1,9 @@ +from __future__ import annotations + +from typing import TypedDict + + +class GlobalMetadata(TypedDict): + history: str | None + source: str | None + time_encoding: dict | None # to read from ds.time.encoding diff --git a/icclim/models/index_config.py b/icclim/models/index_config.py index 94349372..6267ea31 100644 --- a/icclim/models/index_config.py +++ b/icclim/models/index_config.py @@ -1,20 +1,22 @@ from __future__ import annotations +import dataclasses from typing import Callable -from icclim.models.cf_variable import CfVariable -from icclim.models.climate_index import ClimateIndex -from icclim.models.constants import PR, TAS, TAS_MAX, TAS_MIN +from icclim.models.climate_variable import ClimateVariable from icclim.models.frequency import Frequency +from icclim.models.logical_link import LogicalLink from icclim.models.netcdf_version import NetcdfVersion from icclim.models.quantile_interpolation import QuantileInterpolation +@dataclasses.dataclass class IndexConfig: """ - Configuration class for standard indices. + DTO class to map icclim.index input the parameters of the different indicator + compute functions. - Parameters + Attributes ---------- frequency: Frequency The expected resampling frequency of the output. @@ -23,14 +25,11 @@ class IndexConfig: save_percentile: bool = False On percentile based indices, if True, this saves the percentile in the output netcdf. - is_percent: - On indices resulting in a numbers of days, if True, this converts the results to - % of the sampling frequency netcdf_version: Netcdf version to be used when creating the output window: On indices relying on a rolling window of days, configure the window width. - threshold: + scalar_thresholds: On indices relying on a threshold, configure the threshold value. Unit less. The unit "degC" is added by icclim. transfer_limit_Mbytes: @@ -42,75 +41,18 @@ class IndexConfig: """ frequency: Frequency - cf_variables: list[CfVariable] - save_percentile: bool = False - is_percent: bool = False - netcdf_version: NetcdfVersion - window: int | None - threshold: list[float] | float | None - transfer_limit_Mbytes: int | None + climate_variables: list[ClimateVariable] + min_spell_length: int | None + rolling_window_width: int | None out_unit: str | None callback: Callable[[int], None] | None - - def __init__( - self, - frequency: Frequency, - netcdf_version: str | NetcdfVersion, - index: ClimateIndex | None, - cf_variables: list[CfVariable], - save_percentile: bool = False, - window_width: int | None = 5, - threshold: list[float] | float | None = None, - out_unit: str | None = None, - interpolation=QuantileInterpolation.MEDIAN_UNBIASED, - callback: Callable[[int], None] | None = None, - ): - self.frequency = frequency - self.cf_variables = cf_variables - self.window = window_width - self.save_percentile = save_percentile - self.is_percent = out_unit == "%" - self.out_unit = out_unit - self.netcdf_version = NetcdfVersion.lookup(netcdf_version) - self.interpolation = interpolation - self.threshold = threshold - self.callback = callback - self.index = index - - @property - def tas(self) -> CfVariable: - tas_vars = list(filter(lambda v: v.name in TAS, self.cf_variables)) - if len(tas_vars) == 1: - return tas_vars[0] - # Otherwise rely on positional guess - return self.cf_variables[0] - - @property - def tasmax(self) -> CfVariable: - tas_max_vars = list(filter(lambda v: v.name in TAS_MAX, self.cf_variables)) - if len(tas_max_vars) == 1: - return tas_max_vars[0] - # Otherwise rely on positional guess - return self.cf_variables[0] - - @property - def tasmin(self) -> CfVariable: - tas_min_vars = list(filter(lambda v: v.name in TAS_MIN, self.cf_variables)) - if len(tas_min_vars) == 1: - return tas_min_vars[0] - # Otherwise rely on positional guess - if len(self.cf_variables) > 1: - # compound indices case - return self.cf_variables[1] - return self.cf_variables[0] - - @property - def pr(self) -> CfVariable: - pr_vars = list(filter(lambda v: v.name in PR, self.cf_variables)) - if len(pr_vars) == 1: - return pr_vars[0] - # Otherwise rely on positional guess - if len(self.cf_variables) > 1: - # compound indices case - return self.cf_variables[1] - return self.cf_variables[0] + netcdf_version: NetcdfVersion + save_thresholds: bool + interpolation: QuantileInterpolation + is_compared_to_reference: bool + reference_period: tuple[str, str] | None + indicator_name: str + logical_link: LogicalLink + coef: float | None + date_event: bool + sampling_method: str diff --git a/icclim/models/index_group.py b/icclim/models/index_group.py index cf4a4073..1538333e 100644 --- a/icclim/models/index_group.py +++ b/icclim/models/index_group.py @@ -1,34 +1,32 @@ from __future__ import annotations -from enum import Enum +import dataclasses from typing import Any +from icclim.models.registry import Registry -class IndexGroup(Enum): - TEMPERATURE = "temperature" - HEAT = "heat" - COLD = "cold" - DROUGHT = "drought" - RAIN = "rain" - SNOW = "snow" - COMPOUND = "compound" - WILD_CARD_GROUP = "all" # no index bound to it - - @staticmethod - def lookup(query: str | IndexGroup) -> IndexGroup: - if isinstance(query, IndexGroup): - return query - for gr in IndexGroup: - if gr.value.upper() == query.upper(): - return gr - valid_values = list(map(lambda x: x.value, IndexGroup)) - raise NotImplementedError( - f"IndexGroup must be one of the following: {valid_values}," - f" but query was {query}." - ) + +@dataclasses.dataclass +class IndexGroup: + name: str def get_indices(self) -> list[Any]: - # import locally to avoid circular dependency (an index has already a group) - from icclim.ecad.ecad_indices import EcadIndex + # import locally to avoid circular dependency (an index has a IndexGroup) + from icclim.ecad.ecad_indices import EcadIndexRegistry + + return list(filter(lambda i: i.group == self, EcadIndexRegistry.values())) + + +class IndexGroupRegistry(Registry): + _item_class = IndexGroup - return list(filter(lambda i: i.group == self, EcadIndex)) + TEMPERATURE = IndexGroup("temperature") + HEAT = IndexGroup("heat") + COLD = IndexGroup("cold") + DROUGHT = IndexGroup("drought") + RAIN = IndexGroup("rain") + SNOW = IndexGroup("snow") + COMPOUND = IndexGroup("compound") + GENERIC = IndexGroup("generic") + # no climate index should be bounded to "all" + WILD_CARD_GROUP = IndexGroup("all") diff --git a/icclim/models/logical_link.py b/icclim/models/logical_link.py new file mode 100644 index 00000000..f5257b8e --- /dev/null +++ b/icclim/models/logical_link.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +import dataclasses +from functools import reduce +from typing import Callable + +import numpy as np +from xarray import DataArray + +from icclim.models.registry import Registry + + +@dataclasses.dataclass +class LogicalLink: + name: str + compute: Callable[[list[DataArray]], DataArray] + + def __call__(self, *args, **kwargs) -> DataArray: + return self.compute(*args, **kwargs) + + +class LogicalLinkRegistry(Registry): + _item_class = LogicalLink + + LOGICAL_OR = LogicalLink( + "or", lambda data_list: reduce(np.logical_or, data_list) # type:ignore + ) + LOGICAL_AND = LogicalLink( + "and", lambda data_list: reduce(np.logical_and, data_list) # type:ignore + ) diff --git a/icclim/models/netcdf_version.py b/icclim/models/netcdf_version.py index 1d96e09e..305a540c 100644 --- a/icclim/models/netcdf_version.py +++ b/icclim/models/netcdf_version.py @@ -1,21 +1,19 @@ from __future__ import annotations -from enum import Enum +import dataclasses -from icclim.icclim_exceptions import InvalidIcclimArgumentError +from icclim.models.registry import Registry -class NetcdfVersion(Enum): - NETCDF4 = "NETCDF4" - NETCDF4_CLASSIC = "NETCDF4_CLASSIC" - NETCDF3_CLASSIC = "NETCDF3_CLASSIC" - NETCDF3_64BIT = "NETCDF3_64BIT" +@dataclasses.dataclass +class NetcdfVersion: + name: str - @staticmethod - def lookup(query: str | NetcdfVersion): - if isinstance(query, NetcdfVersion): - return query - for version in NetcdfVersion: - if version.name.upper() == query.upper(): - return version - raise InvalidIcclimArgumentError(f"Unknown netcdf version {query}") + +class NetcdfVersionRegistry(Registry): + _item_class = NetcdfVersion + + NETCDF4 = NetcdfVersion("NETCDF4") + NETCDF4_CLASSIC = NetcdfVersion("NETCDF4_CLASSIC") + NETCDF3_CLASSIC = NetcdfVersion("NETCDF3_CLASSIC") + NETCDF3_64BIT = NetcdfVersion("NETCDF3_64BIT") diff --git a/icclim/models/operator.py b/icclim/models/operator.py new file mode 100644 index 00000000..6a4f99d8 --- /dev/null +++ b/icclim/models/operator.py @@ -0,0 +1,89 @@ +from __future__ import annotations + +import dataclasses +from typing import Callable + +from xarray import DataArray + +from icclim.icclim_exceptions import InvalidIcclimArgumentError +from icclim.models.registry import Registry + + +def _reach_err(_, __): + # can't raise error in lambda + raise InvalidIcclimArgumentError( + "Reach operator can't be called. Try to fill threshold with an operand" + " (e.g. '>=' in '>= 22 degC')." + ) + + +@dataclasses.dataclass +class Operator: + short_name: str + long_name: str + standard_name: str + operand: str + compute: Callable[[DataArray, DataArray | int | float], DataArray] + aliases: list[str] + + def __call__(self, *args, **kwargs): + return self.compute(*args, **kwargs) + + +class OperatorRegistry(Registry): + _item_class = Operator + + @staticmethod + def get_item_aliases(op) -> list[str]: + return list(map(str.upper, op.aliases)) + + GREATER = Operator( + short_name="gt", + long_name="greater than", + standard_name="greater_than", + aliases=["gt", ">"], + operand=">", + compute=lambda da, th: da > th, # noqa + ) + LOWER = Operator( + short_name="lt", + long_name="lower than", + standard_name="lower_than", + aliases=["lt", "<"], + operand="<", + compute=lambda da, th: da < th, # noqa + ) + GREATER_OR_EQUAL = Operator( + short_name="get", + long_name="greater or equal to", + standard_name="greater_or_equal_to", + aliases=["get", "ge", ">=", "=>"], + operand=">=", + compute=lambda da, th: da >= th, # noqa + ) + LOWER_OR_EQUAL = Operator( + short_name="let", + long_name="lower or equal to", + standard_name="lower_or_equal_to", + aliases=["let", "le", "<=", "=<"], + operand="<=", + compute=lambda da, th: da <= th, # noqa + ) + EQUAL = Operator( + short_name="e", + long_name="equal to", + standard_name="equal_to", + aliases=["e", "equal", "eq", "=", "=="], + operand="==", + compute=lambda da, th: da == th, # noqa + ) + # A None operand means the threshold is reached and a reducer specific computation + # is done. Case of excess and deficit (a.k.a gd4, hd17) + REACH = Operator( + short_name="reach", + long_name="", # nothing + standard_name="reaching", + aliases=["r"], + operand="reach", + compute=_reach_err, + ) diff --git a/icclim/models/quantile_interpolation.py b/icclim/models/quantile_interpolation.py index cf4a3b65..286f18fa 100644 --- a/icclim/models/quantile_interpolation.py +++ b/icclim/models/quantile_interpolation.py @@ -1,23 +1,19 @@ from __future__ import annotations -from enum import Enum +import dataclasses +from icclim.models.registry import Registry -class QuantileInterpolation(Enum): - LINEAR = ("linear", 1, 1) - MEDIAN_UNBIASED = ("hyndman_fan", 1.0 / 3, 1.0 / 3) - def __init__(self, alias, alpha, beta): - self.alias = alias - self.alpha = alpha - self.beta = beta +@dataclasses.dataclass +class QuantileInterpolation: + name: str + alpha: float + beta: float - @staticmethod - def lookup(s: str): - for interpolation in QuantileInterpolation: - if interpolation.value.upper() == s.upper(): - return interpolation - valid_values = list(map(lambda x: x.value, QuantileInterpolation)) - raise NotImplementedError( - f"Interpolation must be one of the following: {valid_values}" - ) + +class QuantileInterpolationRegistry(Registry): + _item_class = QuantileInterpolation + + LINEAR = QuantileInterpolation("linear", 1, 1) + MEDIAN_UNBIASED = QuantileInterpolation("median_unbiased", 1.0 / 3, 1.0 / 3) diff --git a/icclim/models/registry.py b/icclim/models/registry.py new file mode 100644 index 00000000..c9704f08 --- /dev/null +++ b/icclim/models/registry.py @@ -0,0 +1,55 @@ +from __future__ import annotations + +from copy import deepcopy + +from icclim.icclim_exceptions import InvalidIcclimArgumentError + + +class Registry: + """This class is a fancy enum to easily store and find constant items of + similar type. + It acts as a namespace so there is no need to instantiate it or any of + its subclasses. + + Notes + ----- + Registries are not meant to store large collections, they are just fancy lookup + tables for items with aliases and no case sensitivity. + """ + + _item_class: type + + def __init__(self): + raise NotImplementedError("Don't instantiate Registry, use its class methods.") + + @classmethod + def lookup(cls, query: _item_class | str, no_error: bool = False) -> _item_class: + if isinstance(query, cls._item_class): + return query + q = query.upper() + for key, item in cls.catalog().items(): + if q == key.upper() or q in cls.get_item_aliases(item): + return deepcopy(item) + if no_error: + return None + raise InvalidIcclimArgumentError( + f"Unknown {cls._item_class.__qualname__}: '{query}'. " + f"Use one of {cls.all_aliases()}." + ) + + @classmethod + def all_aliases(cls) -> list[_item_class]: + return list(map(cls.get_item_aliases, list(cls.catalog().values()))) + + @staticmethod + def get_item_aliases(item: _item_class) -> list[str]: + """Should be overridden.""" + return [item.name.upper()] + + @classmethod + def catalog(cls) -> dict[str, _item_class]: + return {k: v for k, v in cls.__dict__.items() if isinstance(v, cls._item_class)} + + @classmethod + def values(cls) -> list[_item_class]: + return [v for k, v in cls.__dict__.items() if isinstance(v, cls._item_class)] diff --git a/icclim/models/standard_index.py b/icclim/models/standard_index.py new file mode 100644 index 00000000..b497aa57 --- /dev/null +++ b/icclim/models/standard_index.py @@ -0,0 +1,77 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any, Callable, Optional, Sequence, Tuple, Union + +from xarray import DataArray + +from icclim.generic_indices.cf_var_metadata import StandardVariable +from icclim.models.index_group import IndexGroup + +ComputeIndexFun = Callable[ + [Any], Union[DataArray, Tuple[DataArray, Optional[DataArray]]] +] + + +@dataclass +class StandardIndex: + """Standard Index data class. + It is used to describe how a GenericIndicator should be setup to compute a climate + index that has been defined in the literature (such as ECA&D's ATBD document). + + + Attributes + ---------- + + short_name: str + The index name used in the output. + compute: Callable + The function to compute the index. It usually wraps a xclim functions. + group: IndexGroup + The index group category. + variables: List[List[str]] + The Cf variables needed to compute the index. + The variable are individually described by a list of aliases. + qualifiers: List[str] | None + ``optional`` List of configuration to compute the index. + Used internally to generate modules for C3S. + source: str | None + Where the index definition comes from. + definition: str | None + A formal definition of the index. It should describe what kind of output + the user is expected to obtain. + """ + + short_name: str + group: IndexGroup + input_variables: list[StandardVariable] | None # None when index is generic + generic_indicator: Any # Any -> GenericIndicator + # todo: merge qualifiers with group into a Set of qualifiers ? + qualifiers: list[str] | None = None + source: str | None = None + reference: str | None = None + definition: str | None = None + threshold: str | None | Any | Sequence[str | Any] = None # Any -> Threshold + output_unit: str | None = None + # additional, index specific args + rolling_window_width: int | None = None + doy_window_width: int | None = None + min_spell_length: int | None = None + + def __str__(self): + return f"{self.group} | {self.short_name} | {self.definition}" + + def __call__(self, *args, **kwargs): + self.generic_indicator(*args, **kwargs) + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, StandardIndex): + return False + return ( + self.generic_indicator.name == other.generic_indicator.name + and self.threshold == other.threshold + and self.output_unit == other.output_unit + and self.rolling_window_width == other.rolling_window_width + and self.doy_window_width == other.doy_window_width + and self.min_spell_length == other.min_spell_length + ) diff --git a/icclim/models/threshold.py b/icclim/models/threshold.py new file mode 100644 index 00000000..d616edfb --- /dev/null +++ b/icclim/models/threshold.py @@ -0,0 +1,343 @@ +from __future__ import annotations + +from datetime import datetime +from functools import partial +from typing import Any, Callable, Sequence, Union + +import numpy as np +import xarray as xr +from xarray import DataArray, Dataset +from xclim.core.calendar import build_climatology_bounds, percentile_doy +from xclim.core.units import convert_units_to +from xclim.core.utils import PercentileDataArray, calc_perc + +from icclim.icclim_exceptions import InvalidIcclimArgumentError +from icclim.models.constants import ( + DOY_COORDINATE, + DOY_PERCENTILE_UNIT, + PERIOD_PERCENTILE_UNIT, + UNITS_ATTRIBUTE_KEY, +) +from icclim.models.frequency import Frequency +from icclim.models.operator import Operator, OperatorRegistry +from icclim.models.quantile_interpolation import ( + QuantileInterpolation, + QuantileInterpolationRegistry, +) +from icclim.pre_processing.input_parsing import ( + build_reference_da, + is_dataset_path, + read_dataset, + read_string_threshold, + read_threshold_DataArray, +) +from icclim.utils import is_number_sequence + +ThresholdValueType = Union[ + str, float, int, Dataset, DataArray, Sequence[Union[float, int, str]], None +] + +# TODO: [refacto] a model file/class should not have that much logic, +# move stuff to a thresholdFactory or something similar + + +class Threshold: + """ + - scalar thresh: "> 25 ºC" + - per grid cell thresh: "> data.nc" + - doy percentile threshold: "> 98th doy_per" + - period percentile threshold: "> 75th period_per" + - period percentile threshold with min value: "> 98th period_per", min_value= "1mm" + - sequence thresholds (or): "> 10 ºC, > 25 ºC" + thresholds are a new dimension + """ + + operator: Operator + value: DataArray | Callable[[Frequency, DataArray], PercentileDataArray] + threshold_var_name: str | None # may be guessed if missing + + # -- Percentile specific properties: + reference_period: Sequence[str] + doy_window_width: int + only_leap_years: bool + interpolation: QuantileInterpolation + threshold_min_value: Threshold + + @property + def unit(self) -> str | None: + if isinstance(self.value, Callable): + return None + return self.value.attrs[UNITS_ATTRIBUTE_KEY] + + @unit.setter + def unit(self, unit): + if not isinstance(self.value, Callable): + if self.value.attrs.get(UNITS_ATTRIBUTE_KEY, None) is not None: + self.value = convert_units_to(self.value, unit) + self.value.attrs[UNITS_ATTRIBUTE_KEY] = unit + if self.threshold_min_value: + self.threshold_min_value.unit = unit + + def __init__( + self, + query: str | None | Operator = None, + value: ThresholdValueType = None, + unit: str | None = None, + threshold_var_name: str | None = None, + doy_window_width: int = 5, + only_leap_years: bool = False, + interpolation: str | QuantileInterpolation = "median_unbiased", + reference_period: Sequence[datetime | str] | None = None, + threshold_min_value: str | float | None = None, + ): + is_doy_per_threshold = False + if isinstance(query, str) and value is None and unit is None: + operator, unit, value = read_string_threshold(query) + self.initial_query = query + else: + operator = query + self.initial_query = None + if isinstance(interpolation, str): + interpolation = QuantileInterpolationRegistry.lookup(interpolation) + if is_dataset_path(value) or isinstance(value, Dataset): + # e.g. Threshold(">", "thresh*.nc" , "degC") + ds = ( + value + if isinstance(value, Dataset) + else read_dataset(value, standard_var=None) + ) + _check_threshold_var_name(threshold_var_name) + value = read_threshold_DataArray( + ds[threshold_var_name], + threshold_min_value=threshold_min_value, + climatology_bounds=reference_period, + unit=unit, + ) + if DOY_COORDINATE in value.coords: + is_doy_per_threshold = True + elif is_number_sequence(value): + # e.g. Threshold(">", [2,3,4], "degC") + value = DataArray(data=value) + elif unit == DOY_PERCENTILE_UNIT: + value = partial( + build_doy_per, + per_val=float(value), + reference_period=reference_period, + interpolation=interpolation, + only_leap_years=only_leap_years, + doy_window_width=doy_window_width, + percentile_min_value=threshold_min_value, + ) + is_doy_per_threshold = True + elif unit == PERIOD_PERCENTILE_UNIT: + value = partial( + build_period_per, + per_val=float(value), + reference_period=reference_period, + interpolation=interpolation, + only_leap_years=only_leap_years, + percentile_min_value=threshold_min_value, + ) + elif isinstance(value, (float, int)): + value = DataArray(data=value) + elif isinstance(value, DataArray): + # nothing to do + ... + else: + raise NotImplementedError( + f"Threshold could not be built from the {type(value)}" + ) + self.is_doy_per_threshold = is_doy_per_threshold + self.operator = ( + OperatorRegistry.lookup(operator, no_error=True) or OperatorRegistry.REACH + ) + self.value = value + self.threshold_min_value = ( + Threshold(threshold_min_value) if threshold_min_value else None + ) + self.unit = unit + self.threshold_var_name = threshold_var_name + self.doy_window_width = doy_window_width + self.only_leap_years = only_leap_years + self.interpolation = interpolation + self.reference_period = reference_period + + def __eq__(self, other): + return ( + isinstance(other, Threshold) + and self.initial_query == other.initial_query + and self.doy_window_width == other.doy_window_width + and self.only_leap_years == other.only_leap_years + and self.interpolation == other.interpolation + and self.reference_period == other.reference_period + and self.unit == other.unit + and self.threshold_min_value == other.threshold_min_value + ) + + def get_metadata( + self, + src_freq: Frequency, + must_run_bootstrap: bool = False, + indicator_name: str | None = None, + ) -> dict[str, Any]: + # TODO: [xclim backport] localize/translate these with templates + additional_metadata = [] + if self.value.size == 1: + res = { + "standard_name": f"{self.operator.standard_name}" + f"_threshold", # not cf + "long_name": f"{self.operator.long_name}" + f" {self.value.values[()]}" + f" {self.unit}", + "short_name": f"{self.operator.short_name}_threshold", + } + elif isinstance(self.value, PercentileDataArray): + percentiles = self.value.coords["percentiles"].values + bds = self.value.attrs.get("climatology_bounds") + if self.is_doy_per_threshold: + if percentiles.size == 1: + display_perc = f"{percentiles[0]}th day of year percentile" + standard_name = "doy_percentile_threshold" + short_name = "doy_per_threshold" + else: + display_perc = str(list(map(lambda x: f"{x}th", percentiles))) + standard_name = "_doy_percentile_thresholds" + short_name = "_doy_per_thresholds" + window = self.value.attrs.get("window", "") + additional_metadata.append( + f"day of year percentiles were computed per grid cell, on the {bds}" + f" period, with a {window} {src_freq.units} centred window to" + f" aggregate values around each day of year" + ) + if must_run_bootstrap: + additional_metadata.append( + "the bootstrap algorithm has been applied to compute doy" + " percentiles for the period overlapping both the reference" + " period and the studied period" + ) + else: + if percentiles.size == 1: + display_perc = f"{percentiles[0]}th period percentile" + standard_name = "period_percentile_threshold" + short_name = "period_per_threshold" + else: + display_perc = ( + str(list(map(lambda x: f"{x}th", percentiles))) + + " period percentiles" + ) + standard_name = "_period_percentile_thresholds" + short_name = "period_per_thresholds" + additional_metadata.append( + f"period percentiles were computed per grid cell, on the {bds}" + " period" + ) + res = { + "standard_name": f"{self.operator.standard_name}_{standard_name}", + "long_name": f"{self.operator.long_name} {display_perc}", + "short_name": short_name, + } + elif isinstance(self.value, DataArray): + if self.value.size < 10: + display_value = f"{self.value.values} {self.unit}" + else: + display_value = ( + f"per grid cell values between" + f" {np.format_float_positional(self.value.min().values[()], 3)}" + f" {self.unit}" + f" and {np.format_float_positional(self.value.max().values[()], 3)}" + f" {self.unit}" + ) + res = { + "standard_name": f"{self.operator.standard_name}_thresholds", + "long_name": f"{self.operator.long_name} {display_value}", + "short_name": f"{self.operator.short_name}_thresholds", + } + else: + raise NotImplementedError( + f"Threshold::value must be a DataArray. It was a {type(self.value)}." + ) + if self.threshold_min_value: + min_t = self.threshold_min_value.get_metadata(src_freq, False) + additional_metadata.append( + f"only values {min_t['long_name']} were considered" + ) + if len(additional_metadata) > 0: + added_meta = map(lambda s: s.capitalize(), additional_metadata) + added_meta = "(" + (". ".join(added_meta)) + ")" + res.update({"additional_metadata": added_meta}) + return res + + +def _check_threshold_var_name(threshold_var_name: str | None) -> None: + if threshold_var_name is None: + raise InvalidIcclimArgumentError( + "When threshold is a Dataset, " + "threshold_var_name must be given to " + "find the data_variable in the " + "dataset." + ) + + +def build_period_per( + per_val: float, + reference_period: Sequence[str], + interpolation: QuantileInterpolation, + only_leap_years: bool, + studied_data: DataArray, + percentile_min_value: float | None, +) -> PercentileDataArray: + reference = build_reference_da( + studied_data, + reference_period, + only_leap_years, + percentile_min_value=percentile_min_value, + ) + computed_per = xr.apply_ufunc( + calc_perc, + reference, + input_core_dims=[["time"]], + output_core_dims=[["percentiles"]], + keep_attrs=True, + kwargs=dict( + percentiles=[per_val], + alpha=interpolation.alpha, + beta=interpolation.beta, + copy=True, + ), + dask="parallelized", + output_dtypes=[reference.dtype], + dask_gufunc_kwargs=dict(output_sizes={"percentiles": 1}, allow_rechunk=True), + ) + computed_per = computed_per.assign_coords( + percentiles=xr.DataArray([per_val], dims=("percentiles",)) + ) + res = PercentileDataArray.from_da( + source=computed_per, + climatology_bounds=build_climatology_bounds(reference), + ) + return res + + +def build_doy_per( + per_val: float, + reference_period: Sequence[str], + interpolation: QuantileInterpolation, + only_leap_years: bool, + doy_window_width: int, + studied_data: DataArray, + percentile_min_value: float | None, +) -> PercentileDataArray: + reference = build_reference_da( + studied_data, + reference_period, + only_leap_years, + percentile_min_value, + ) + res = percentile_doy( + arr=reference, + window=doy_window_width, + per=per_val, + alpha=interpolation.alpha, + beta=interpolation.beta, + ).compute() # "optimization" (diminish dask scheduler workload) + return res diff --git a/icclim/models/user_index_config.py b/icclim/models/user_index_config.py deleted file mode 100644 index 9cd822ca..00000000 --- a/icclim/models/user_index_config.py +++ /dev/null @@ -1,194 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass -from enum import Enum -from typing import Any, Callable, Literal - -from xarray.core.dataarray import DataArray -from xclim.core.calendar import select_time - -from icclim.icclim_exceptions import InvalidIcclimArgumentError -from icclim.models.frequency import Frequency -from icclim.models.index_config import CfVariable -from icclim.utils import get_date_to_iso_format - -LogicalOperationLiteral = Literal[ - "gt", - ">", - "lt", - "<", - "get", - "ge", - ">=", - "=>", - "let", - "le", - "<=", - "=<", - "e", - "equal", - "eq", - "=", - "==", -] - - -class LinkLogicalOperation(Enum): - OR_STAMP = "or" - AND_STAMP = "and" - - @staticmethod - def lookup(query: str) -> LinkLogicalOperation: - for mode in LinkLogicalOperation: - if query.upper == mode.value.upper(): - return mode - raise InvalidIcclimArgumentError( - f"Unknown link_logical_operation mode {query}." - f"Use one of {[linkOp.value for linkOp in LinkLogicalOperation]}." - ) - - -class ExtremeMode(Enum): - MIN = "min" - MAX = "max" - - @staticmethod - def lookup(query: str) -> ExtremeMode: - for mode in ExtremeMode: - if query.upper() == mode.value.upper(): - return mode - raise InvalidIcclimArgumentError( - f"Unknown extreme_mode {query}." - f" Use one of {[mode.value for mode in ExtremeMode]}." - ) - - -class LogicalOperation(Enum): - GREATER_THAN = (["gt", ">"], ">", lambda da, th: da > th) - LOWER_THAN = (["lt", "<"], "<", lambda da, th: da < th) - GREATER_OR_EQUAL_THAN = (["get", "ge", ">=", "=>"], ">=", lambda da, th: da >= th) - LOWER_OR_EQUAL_THAN = (["let", "le", "<=", "=<"], "<=", lambda da, th: da <= th) - EQUAL = (["e", "equal", "eq", "=", "=="], "==", lambda da, th: da == th) - - def __init__( - self, - aliases: str, - operator: str, - compute: Callable[[DataArray, DataArray | float | int], DataArray], - ) -> None: - super().__init__() - self.aliases = aliases - self.operator = operator - self.compute = compute - - @staticmethod - def lookup(query: str) -> LogicalOperation: - for op in LogicalOperation: - if query.upper() in map(str.upper, op.aliases): - return op - raise InvalidIcclimArgumentError( - f"Unknown logical operator {query}." - f"Use one of {[op.aliases for op in LogicalOperation]}." - ) - - -@dataclass -class NbEventConfig: - logical_operation: list[LogicalOperation] - thresholds: list[float | str] - link_logical_operations: LinkLogicalOperation | None = None - data_arrays: list[CfVariable] | None = None - - -@dataclass -class UserIndexConfig: - index_name: str - calc_operation: str - cf_vars: list[CfVariable] - freq: Frequency - date_event: bool - is_percent: bool - logical_operation: LogicalOperation | None = None - thresh: float | int | str | list[float | int | str] | None = None - link_logical_operations: LinkLogicalOperation | None = None - extreme_mode: ExtremeMode | None = None - window_width: int | None = None - coef: float | None = None - var_type: str | None = None - nb_event_config: NbEventConfig | None = None - save_percentile: bool = False - - def __init__( - self, - index_name: str, - # Any should be CalcOperation but it causes circular import - calc_operation: str | Any, - freq: Frequency, - cf_vars: list[CfVariable], - logical_operation: str = None, - thresh=None, - link_logical_operations: str = None, - extreme_mode: str = None, - window_width=None, - coef=None, - date_event=None, - var_type=None, - is_percent=False, - save_percentile=False, - ref_time_range: list[str] = None, - ) -> None: - self.index_name = index_name - self.calc_operation = calc_operation - self.freq = freq - if logical_operation is not None: - self.logical_operation = LogicalOperation.lookup(logical_operation) - self.thresh = thresh - if extreme_mode is not None: - self.extreme_mode = ExtremeMode.lookup(extreme_mode) - self.window_width = window_width - self.coef = coef - self.date_event = date_event - self.var_type = var_type - self.is_percent = is_percent - if freq.indexer is not None: - for cf_var in cf_vars: - cf_var.study_da = select_time(cf_var.study_da, **freq.indexer) - cf_var.reference_da = select_time(cf_var.reference_da, **freq.indexer) - self.cf_vars = cf_vars - if thresh is not None and logical_operation is not None: - self.nb_event_config = get_nb_event_conf( - logical_operation, link_logical_operations, thresh, cf_vars - ) - self.save_percentile = save_percentile - if (rtr := ref_time_range) is not None: - rtr = [get_date_to_iso_format(date) for date in rtr] - for cf_var in cf_vars: - cf_var.reference_da = cf_var.study_da.sel(time=slice(rtr[0], rtr[1])) - - -def get_nb_event_conf( - logical_operation: list[str] | str, - link_logical_operations: str | None, - thresholds: list[str | float] | float | str, - cfvars: list[CfVariable], -) -> NbEventConfig: - if not isinstance(thresholds, list): - threshold_list = [thresholds] - else: - threshold_list = thresholds - if isinstance(logical_operation, list): - logical_operations = list(map(LogicalOperation.lookup, logical_operation)) - else: - logical_operations = [LogicalOperation.lookup(logical_operation)] - if link_logical_operations is not None: - link_logical_operation_list = LinkLogicalOperation.lookup( - link_logical_operations - ) - else: - link_logical_operation_list = None - return NbEventConfig( - logical_operation=logical_operations, - link_logical_operations=link_logical_operation_list, - thresholds=threshold_list, - data_arrays=cfvars, - ) diff --git a/icclim/models/user_index_dict.py b/icclim/models/user_index_dict.py index 4704fb6e..b2bddb71 100644 --- a/icclim/models/user_index_dict.py +++ b/icclim/models/user_index_dict.py @@ -1,23 +1,25 @@ from __future__ import annotations import datetime -from typing import Literal, TypedDict +from typing import Literal, Sequence, TypedDict -from icclim.models.user_index_config import LogicalOperationLiteral -from icclim.user_indices.calc_operation import CalcOperation, CalcOperationLiteral +from icclim.models.logical_link import LogicalLink +from icclim.user_indices.calc_operation import CalcOperation, CalcOperationLike class UserIndexDict(TypedDict, total=False): index_name: str - calc_operation: CalcOperationLiteral | CalcOperation - logical_operation: LogicalOperationLiteral | None - thresh: str | float | None - link_logical_operations: Literal["and", "or"] | None + calc_operation: CalcOperationLike | CalcOperation + logical_operation: str | None | Sequence[str] # >= | <= | ...| == + thresh: str | float | int | Sequence[str] | Sequence[float] | Sequence[int] | None extreme_mode: Literal["min", "max"] | None - window_width: int | None + + link_logical_operations: Literal["and", "or"] | LogicalLink | None coef: float | None - date_event: bool | None + date_event: bool var_type: Literal["t", "p"] | None + window_width: int | None ref_time_range: list[datetime] | list[str] | tuple[str, str] | None - # deprecated + + # -- deprecated indice_name: str | None diff --git a/icclim/pre_processing/in_file_dictionary.py b/icclim/pre_processing/in_file_dictionary.py new file mode 100644 index 00000000..2d66297e --- /dev/null +++ b/icclim/pre_processing/in_file_dictionary.py @@ -0,0 +1,26 @@ +from __future__ import annotations + +from typing import TypedDict + +from icclim.icclim_types import InFileBaseType +from icclim.models.threshold import Threshold + + +class InFileDictionary(TypedDict, total=False): + """Dictionary grouping in_files and var_name functionnalities. + It also allows to use a different input for thresholds such as percentiles. + + Examples + -------- + + >>> in_files = { + ... "tasmax": { "study": "tasmax-store.zarr", + ... "threshold": Threshold(">", ["per-1.nc", "per-2.nc"]) + ... } + ... "pr": "pr.nc", + ... "tasmin": {"study": "tasmin.nc"}, + ... } + """ + + study: InFileBaseType + thresholds: Threshold | None diff --git a/icclim/pre_processing/input_parsing.py b/icclim/pre_processing/input_parsing.py index e5c66309..1c389178 100644 --- a/icclim/pre_processing/input_parsing.py +++ b/icclim/pre_processing/input_parsing.py @@ -1,181 +1,105 @@ from __future__ import annotations -from typing import Callable, Dict, List, TypedDict, Union +import re +from datetime import datetime +from typing import Hashable, Sequence +import numpy as np import xarray as xr import xclim from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset +from xclim.core.units import convert_units_to from xclim.core.utils import PercentileDataArray -from icclim.ecad.ecad_indices import EcadIndex +from icclim.generic_indices.cf_var_metadata import ( + StandardVariable, + StandardVariableRegistry, +) from icclim.icclim_exceptions import InvalidIcclimArgumentError -from icclim.models.cf_calendar import CfCalendar -from icclim.models.cf_variable import CfVariable -from icclim.models.climate_index import ClimateIndex -from icclim.models.constants import VALID_PERCENTILE_DIMENSION -from icclim.models.frequency import Frequency -from icclim.models.index_group import IndexGroup +from icclim.icclim_types import InFileBaseType +from icclim.models.cf_calendar import CfCalendarRegistry +from icclim.models.constants import UNITS_ATTRIBUTE_KEY, VALID_PERCENTILE_DIMENSION +from icclim.models.index_group import IndexGroup, IndexGroupRegistry +from icclim.models.standard_index import StandardIndex from icclim.utils import get_date_to_iso_format -# zarr or netcdf, or list of netcdf or xarray struct -InFileBaseType = Union[str, List[str], Dataset, DataArray] - - -class InFileDictionary(TypedDict, total=False): - """Dictionary grouping in_files and var_name functionnalities. - It also allows to use a different input for percentiles. - - Examples - -------- - - >>> in_files = { - ... "tasmax": { "study": "tasmax-store.zarr", - ... "thresholds": ["per-1.nc", "per-2.nc"], - ... "climatology_bounds":['1990-01-01', '1991-12-31'], - ... "per_var_name":"tas_max_per" }, - ... "pr": "pr.nc", - ... "tasmin": {"study": "tasmin.nc"}, - ... } - """ - - study: InFileBaseType - thresholds: InFileBaseType | None - climatology_bounds: tuple[str, str] | list[str] | None # may be guessed if missing - per_var_name: str | None # may be guessed if missing - - -InFileType = Union[Dict[str, Union[InFileDictionary, InFileBaseType]], InFileBaseType] +DEFAULT_INPUT_FREQUENCY = "days" def guess_var_names( ds: Dataset, - in_data: InFileType | None = None, - index: ClimateIndex | None = None, - var_names: str | list[str] | None = None, -) -> list[str]: - if isinstance(in_data, dict): - # case InFileDictionary - if var_names is not None: - raise InvalidIcclimArgumentError( - "When `in_files` is a dictionary, `var_name` must be empty." - " The dictionary's keys are the expected variable names." - ) - return list(in_data.keys()) - elif var_names is None: - if index is None: - raise InvalidIcclimArgumentError( - "Unable to guess variable name." " Provide one using `var_name`" - ) - return _guess_dataset_var_names(index, ds) + var_names: str | Sequence[str] | None, + standard_index: StandardIndex | None, +) -> list[Hashable]: + if var_names is None: + return _guess_dataset_var_names(ds=ds, standard_index=standard_index) elif isinstance(var_names, str): return [var_names] - elif isinstance(var_names, list): + elif isinstance(var_names, (list, tuple)): return var_names else: raise NotImplementedError("`var_name` must be a string a list or None.") def read_dataset( - in_data: InFileType, - index: EcadIndex = None, - var_name: str | list[str] = None, # used only if input is a DataArray + in_files: InFileBaseType, + standard_var: StandardVariable | None = None, + var_name: str | Sequence[str] = None, ) -> Dataset: - if isinstance(in_data, dict): - return _read_dictionary(in_data, index) - elif isinstance(in_data, Dataset): - return in_data - elif isinstance(in_data, DataArray): - return _read_dataarray(in_data, index, var_name=var_name) - elif isinstance(in_data, list): + if isinstance(in_files, Dataset): + ds = in_files + elif isinstance(in_files, DataArray): + ds = _read_dataarray(in_files, standard_var=standard_var, var_name=var_name) + elif is_glob_path(in_files) or ( + isinstance(in_files, (list, tuple)) and is_netcdf_path(in_files[0]) + ): # we assumes it's a list of netCDF files # join="override" is used for cases some dimension are a tiny bit different # in different files (was the case with eobs). - return xr.open_mfdataset(in_data, parallel=True, join="override") - elif is_netcdf(in_data): - return xr.open_dataset(in_data) - elif is_zarr(in_data): - return xr.open_zarr(in_data) + ds = xr.open_mfdataset(in_files, parallel=True, join="override") + elif is_netcdf_path(in_files): + ds = xr.open_dataset(in_files) + elif is_zarr_path(in_files): + ds = xr.open_zarr(in_files) + elif isinstance(in_files, (list, tuple)): + return xr.merge( + [ + read_dataset(in_file, standard_var, var_name[i]) + for i, in_file in enumerate(in_files) + ] + ) else: - raise NotImplementedError("`in_files` format was not recognized.") - - -def build_cf_variables( - var_names: list[str], - ds: Dataset, - time_range: list[str] | None, - ignore_Feb29th: bool, - base_period_time_range: list[str] | None, - only_leap_years: bool, - freq: Frequency, -) -> list[CfVariable]: - return [ - _build_cf_variable( - ds=ds, - name=var_name, - time_range=time_range, - ignore_Feb29th=ignore_Feb29th, - base_period_time_range=base_period_time_range, - only_leap_years=only_leap_years, - time_clipping=freq.time_clipping, + raise NotImplementedError( + f"`in_files` format {type(in_files)} was not" f" recognized." ) - for var_name in var_names - ] + return update_to_standard_coords(ds) -def update_to_standard_coords(ds: Dataset) -> tuple[Dataset, dict]: +def update_to_standard_coords(ds: Dataset) -> Dataset: """ - Mutate input ds to use more icclim friendly coordinate name. + Mutate input ds to use more icclim friendly coordinate names. """ # TODO see if cf-xarray could replace this - revert = {} - if ds.coords.get("latitude") is not None: - ds = ds.rename({"latitude": "lat"}) - revert.update({"lat": "latitude"}) - if ds.coords.get("longitude") is not None: - ds = ds.rename({"longitude": "lon"}) - revert.update({"lon": "longitude"}) if ds.coords.get("t") is not None: ds = ds.rename({"t": "time"}) - revert.update({"time": "t"}) - return ds, revert + return ds -def is_zarr(data: InFileBaseType): - return isinstance(data, str) and ".nc" not in data +def is_zarr_path(path: InFileBaseType) -> bool: + return isinstance(path, str) and ".zarr" in path -def is_netcdf(data: InFileBaseType): - return isinstance(data, str) and ".nc" in data +def is_netcdf_path(path: InFileBaseType) -> bool: + return isinstance(path, str) and ".nc" in path -def _read_dictionary(in_data, index): - ds_acc = [] - for climate_var_name, climate_var_data in in_data.items(): - if isinstance(climate_var_data, dict): - study_ds = read_dataset(climate_var_data["study"], index, climate_var_name) - if climate_var_data.get("thresholds", None) is not None: - ds_acc.append(_read_thresholds(climate_var_data, climate_var_name)) - else: - study_ds = read_dataset(climate_var_data, index, climate_var_name) - ds_acc.append(study_ds) - return xr.merge(ds_acc) - - -def _read_thresholds(climate_var_data: InFileDictionary, climate_var_name: str): - per_ds = read_dataset(climate_var_data["thresholds"], index=None) - per_var_name = _get_percentile_var_name(per_ds, climate_var_data, climate_var_name) - per_da = per_ds[per_var_name].rename(f"{climate_var_name}_thresholds") - per_da = _standardize_percentile_dim_name(per_da) - per_da = PercentileDataArray.from_da( - per_da, - climatology_bounds=_read_clim_bounds(climate_var_data, per_da), - ) - return per_da +def is_glob_path(path: InFileBaseType) -> bool: + return isinstance(path, str) and "*" in path -def _standardize_percentile_dim_name(per_da: DataArray) -> DataArray: - # This function could probably be backported to xclim PercentileDataArray +def standardize_percentile_dim_name(per_da: DataArray) -> DataArray: + # todo [xclim backport] This function could probably be in PercentileDataArray per_dim_name = None for d in VALID_PERCENTILE_DIMENSION: if d in per_da.dims: @@ -185,9 +109,8 @@ def _standardize_percentile_dim_name(per_da: DataArray) -> DataArray: per_dim_name = f"{d}s" if per_dim_name is None: raise InvalidIcclimArgumentError( - "Percentile data must contain a recognizable" - " percentiles dimension such as 'percentiles'," - " 'quantile', 'per' or 'centile'." + "Percentile data must contain a recognizable percentiles dimension such as" + " 'percentiles', 'quantile', 'per' or 'centile'." ) per_da = per_da.rename({per_dim_name: "percentiles"}) if "quantile" in per_dim_name: @@ -195,10 +118,10 @@ def _standardize_percentile_dim_name(per_da: DataArray) -> DataArray: return per_da -def _read_clim_bounds(input_dict: InFileDictionary, per_da: DataArray) -> list[str]: - bds = input_dict.get("climatology_bounds", None) or per_da.attrs.get( - "climatology_bounds", None - ) +def read_clim_bounds( + climatology_bounds: Sequence[str, str], per_da: DataArray +) -> list[str]: + bds = climatology_bounds or per_da.attrs.get("climatology_bounds", None) if len(bds) != 2: raise InvalidIcclimArgumentError( "climatology_bounds must be a iterable of length 2." @@ -207,114 +130,93 @@ def _read_clim_bounds(input_dict: InFileDictionary, per_da: DataArray) -> list[s def _read_dataarray( - data: DataArray, index: EcadIndex = None, var_name: str | list[str] = None + data: DataArray, + standard_var: StandardVariable | None = None, + var_name: str | Sequence[str] = None, ) -> Dataset: - if isinstance(var_name, list): + if isinstance(var_name, (tuple, list)): if len(var_name) > 1: raise InvalidIcclimArgumentError( "When the `in_file` is a DataArray, there" - " can only be one value in `var_name`." + f" can only be one value in `var_name` but var_name was: {var_name} " ) else: var_name = var_name[0] - if index is not None: - if len(index.input_variables) > 1: - raise InvalidIcclimArgumentError( - f"Index {index.name} needs {len(index.input_variables)} variables." - f" Please provide them with an xarray.Dataset, netCDF file(s) or a" - f" zarr store." - ) - # first alias of the unique variable - data_name = var_name or index.input_variables[0][0] + data_name = var_name or standard_var.short_name or None else: data_name = var_name or data.name or "unnamed_var" return data.to_dataset(name=data_name, promote_attrs=True) -def _guess_dataset_var_names(index: ClimateIndex, ds: Dataset) -> list[str]: +def _guess_dataset_var_names( + standard_index: StandardIndex, ds: Dataset +) -> list[Hashable]: """Try to guess the variable names using the expected kind of variable for the index. """ def get_error() -> Exception: - main_aliases = ", ".join(map(lambda v: v[0], index_expected_vars)) + main_aliases = ", ".join( + map(lambda v: v.short_name, standard_index.input_variables) + ) return InvalidIcclimArgumentError( - f"Index {index.short_name} needs the following variable(s)" - f" [{main_aliases}], some of them were not recognized in the input." - f" Use `var_name` parameter to explicitly use data variable names" - f" from your input dataset: {list(ds.data_vars)}." + f"Index {standard_index.short_name} needs the following variable(s)" + f" [{main_aliases}], but the input variables were {list(ds.data_vars)}." + f" Use `var_name` parameter to explicitly set variable names." ) - index_expected_vars = index.input_variables - if len(ds.data_vars) == 1: - if len(index_expected_vars) != 1: + if standard_index is not None: + if len(ds.data_vars) == 1: + if len(standard_index.input_variables) != 1: + raise get_error() + return [get_name_of_first_var(ds)] + climate_var_names = [] + for expected_standard_var in standard_index.input_variables: + for alias in expected_standard_var.aliases: + # check if dataset contains this alias + if _is_alias_valid(ds, standard_index, alias): + climate_var_names.append(alias) + break + if len(climate_var_names) < len(standard_index.input_variables): raise get_error() - return [_get_name_of_first_var(ds)] - climate_var_names = [] - for indice_var in index_expected_vars: - for alias in indice_var: - # check if dataset contains this alias - if _is_alias_valid(ds, index, alias): - climate_var_names.append(alias) - break - if len(climate_var_names) < len(index_expected_vars): - raise get_error() - return climate_var_names + return climate_var_names + else: + if len(ds.data_vars) == 1: + return [get_name_of_first_var(ds)] + else: + return _find_standard_vars(ds) -def _has_percentile_variable(ds: Dataset, name: str) -> bool: - # fixme: Not the best to use a string (the name) to identify percentiles data - return f"{name}_thresholds" in ds.data_vars +def _find_standard_vars(ds: Dataset) -> list[Hashable]: + return [ + v + for v in ds.data_vars + if StandardVariableRegistry.lookup(str(v), no_error=True) is not None + ] -def _build_cf_variable( - ds: Dataset, - name: str, - time_range: list[str] | None, +def guess_input_type(data: DataArray) -> StandardVariable | None: + cf_input = StandardVariableRegistry.lookup(str(data.name), no_error=True) + if cf_input is None and data.attrs.get("standard_name", None) is not None: + cf_input = StandardVariableRegistry.lookup( + data.attrs.get("standard_name"), no_error=True + ) + if cf_input is None: + return None + return cf_input + + +def build_studied_data( + original_da: DataArray, + time_range: Sequence[str] | None, ignore_Feb29th: bool, - base_period_time_range: list[str] | None, - only_leap_years: bool, - time_clipping: Callable, -) -> CfVariable: - if len(ds.data_vars) == 1: - da = ds[_get_name_of_first_var(ds)] - else: - da = ds[name] - study_da = _build_study_da(da, time_range, ignore_Feb29th) - if _has_percentile_variable(ds, name): - if base_period_time_range is not None: - raise InvalidIcclimArgumentError( - "Cannot determine the data to use for percentiles when both" - " `base_period_time_range` and an in_files `thresholds` are given." - " Please fill only one of the two." - ) - reference_da = PercentileDataArray.from_da(ds[f"{name}_thresholds"]) - elif base_period_time_range is not None: - reference_da = _build_reference_da(da, base_period_time_range, only_leap_years) - else: - reference_da = study_da - if time_clipping is not None: - study_da = time_clipping(study_da) - reference_da = time_clipping(reference_da) - # TODO: all these pre-processing operations should probably be added in history - # metadata or - # provenance it could be a property in CfVariable which will be reused when we - # update the metadata of the index, at the end. - # We could have a singleton "taking notes" of each operation that must be - # logged into the output netcdf/provenance/metadata - study_da = study_da.chunk("auto") - reference_da = reference_da.chunk("auto") - return CfVariable(name, study_da, reference_da) - - -def _build_study_da( - original_da: DataArray, time_range: list[str] | None, ignore_Feb29th: bool + standard_var: StandardVariable | None, ) -> DataArray: if time_range is not None: - _check_time_range_pre_validity("time_range", time_range) + check_time_range_pre_validity("time_range", time_range) time_range = [get_date_to_iso_format(x) for x in time_range] da = original_da.sel(time=slice(time_range[0], time_range[1])) - _check_time_range_post_validity(da, original_da, "time_range", time_range) + check_time_range_post_validity(da, original_da, "time_range", time_range) if len(da.time) == 0: raise InvalidIcclimArgumentError( f"The given `time_range` {time_range} " @@ -325,39 +227,14 @@ def _build_study_da( else: da = original_da if ignore_Feb29th: - da = xclim.core.calendar.convert_calendar(da, CfCalendar.NO_LEAP.get_name()) - return da - - -def _build_reference_da( - original_da: DataArray, - base_time_range: list[str], - only_leap_years: bool, -) -> DataArray: - _check_time_range_pre_validity("base_period_time_range", base_time_range) - base_time_range = [get_date_to_iso_format(x) for x in base_time_range] - da = original_da.sel(time=slice(base_time_range[0], base_time_range[1])) - _check_time_range_post_validity( - da, original_da, "base_period_time_range", base_time_range - ) - if only_leap_years: - da = _reduce_only_leap_years(original_da) + da = xclim.core.calendar.convert_calendar(da, CfCalendarRegistry.NO_LEAP.name) + if da.attrs.get(UNITS_ATTRIBUTE_KEY, None) is None and standard_var is not None: + da.attrs[UNITS_ATTRIBUTE_KEY] = standard_var.default_units + da = da.chunk("auto") return da -def _reduce_only_leap_years(da: DataArray) -> DataArray: - reduced_list = [] - for _, val in da.groupby(da.time.dt.year): - if val.time.dt.dayofyear.max() == 366: - reduced_list.append(val) - if not reduced_list: - raise InvalidIcclimArgumentError( - "No leap year in current dataset. Do not use `only_leap_years` parameter." - ) - return xr.concat(reduced_list, "time") - - -def _check_time_range_pre_validity(key: str, tr: list) -> None: +def check_time_range_pre_validity(key: str, tr: Sequence[datetime | str]) -> None: if len(tr) != 2: raise InvalidIcclimArgumentError( f"The given `{key}` {tr}" @@ -366,7 +243,7 @@ def _check_time_range_pre_validity(key: str, tr: list) -> None: ) -def _check_time_range_post_validity(da, original_da, key: str, tr: list) -> None: +def check_time_range_post_validity(da, original_da, key: str, tr: list) -> None: if len(da.time) == 0: raise InvalidIcclimArgumentError( f"The given `{key}` {tr} is out of the sample time bounds:" @@ -380,7 +257,7 @@ def _is_alias_valid(ds, index, alias): def _has_valid_unit(group: IndexGroup, da: DataArray) -> bool: - if group == IndexGroup.SNOW: + if group == IndexGroupRegistry.SNOW: try: # todo: unit check might be replaced by cf-xarray xclim.core.units.check_units.__wrapped__(da, "[length]") @@ -390,33 +267,88 @@ def _has_valid_unit(group: IndexGroup, da: DataArray) -> bool: return True -def _get_percentile_var_name( - per_ds: Dataset, in_dict: InFileDictionary, climate_var_name: str -) -> str: - if per_var_name := in_dict.get("per_var_name", None): - return per_var_name - elif len(per_ds.data_vars) == 1: - return _get_name_of_first_var(per_ds) - else: - return _guess_per_var_name(climate_var_name, per_ds) - - -def _guess_per_var_name(climate_var_name: str, per_ds: Dataset) -> str: - data_var_names = map(lambda v: str(v.name), per_ds.data_vars) - for x in data_var_names: - if climate_var_name in x: - return x - raise InvalidIcclimArgumentError( - "Could not guess the variable name for percentiles" - f" of {climate_var_name}. Please, provide the" - f" explicite name using per_var_name like so:" - f" \u007bf'{climate_var_name}':" - f" \u007b'study': 'x.nc'," - f" 'percentiles': 'y.nc'," - f" per_var_name='{climate_var_name}_percentiles'" - f" \u007d\u007d" - ) - - -def _get_name_of_first_var(ds: Dataset) -> str: +def get_name_of_first_var(ds: Dataset) -> str: return str(ds.data_vars[list(ds.data_vars.keys())[0]].name) + + +def is_dataset_path(query: Sequence | str) -> bool: + if isinstance(query, (tuple, list)): + return all(map(lambda q: is_netcdf_path(q), query)) + return is_zarr_path(query) or is_glob_path(query) or is_netcdf_path(query) + + +def reduce_only_leap_years(da: DataArray) -> DataArray: + reduced_list = [] + for _, val in da.groupby(da.time.dt.year): + if val.time.dt.dayofyear.max() == 366: + reduced_list.append(val) + if not reduced_list: + raise InvalidIcclimArgumentError( + "No leap year in current dataset. Do not use `only_leap_years` parameter." + ) + return xr.concat(reduced_list, "time") + + +def read_string_threshold(query: str) -> tuple[str, str, float]: + value = re.findall(r"-?\d+\.?\d*", query)[0] + value_index = query.find(value) + operator = query[0:value_index].strip() + if query.endswith(value): + unit = None + else: + unit = query[value_index + len(value) :].strip() + return operator, unit, float(value) + + +def read_threshold_DataArray( + thresh_da: DataArray, + threshold_min_value: str | float, + climatology_bounds: Sequence[str], + unit: str, +): + if PercentileDataArray.is_compatible(thresh_da): + built_value = PercentileDataArray.from_da( + standardize_percentile_dim_name(thresh_da), + read_clim_bounds(climatology_bounds, thresh_da), + ) + built_value.attrs["unit"] = unit + + else: + if threshold_min_value: + if isinstance(threshold_min_value, str): + threshold_min_value = convert_units_to(threshold_min_value, thresh_da) + # todo in prcptot the replacing value (np.nan) needs to be 0 + built_value = thresh_da.where(thresh_da > threshold_min_value, np.nan) + else: + built_value = thresh_da + built_value.attrs["unit"] = unit + return built_value + + +def build_reference_da( + original_da: DataArray, + base_period_time_range: Sequence[datetime | str] | None, + only_leap_years: bool, + percentile_min_value: str | float | None, +) -> DataArray: + # todo [refacto] move back to threshold ? + reference = original_da + if base_period_time_range: + check_time_range_pre_validity("base_period_time_range", base_period_time_range) + base_period_time_range = [ + get_date_to_iso_format(x) for x in base_period_time_range + ] + reference = original_da.sel( + time=slice(base_period_time_range[0], base_period_time_range[1]) + ) + check_time_range_post_validity( + reference, original_da, "base_period_time_range", base_period_time_range + ) + if only_leap_years: + reference = reduce_only_leap_years(original_da) + if percentile_min_value: + if isinstance(percentile_min_value, str): + percentile_min_value = convert_units_to(percentile_min_value, reference) + # todo in prcptot the replacing value (np.nan) needs to be 0 + reference = reference.where(reference >= percentile_min_value, np.nan) + return reference diff --git a/icclim/pre_processing/rechunk.py b/icclim/pre_processing/rechunk.py index 94febc50..6da392c1 100644 --- a/icclim/pre_processing/rechunk.py +++ b/icclim/pre_processing/rechunk.py @@ -17,7 +17,7 @@ import icclim.utils as utils from icclim.icclim_exceptions import InvalidIcclimArgumentError from icclim.icclim_logger import IcclimLogger -from icclim.pre_processing.input_parsing import is_zarr, read_dataset +from icclim.pre_processing.input_parsing import is_zarr_path, read_dataset TMP_STORE_1 = "icclim-tmp-store-1.zarr" TMP_STORE_2 = "icclim-tmp-store-2.zarr" @@ -145,7 +145,7 @@ def create_optimized_zarr_store( _remove_stores(*stores_to_remove, filesystem=filesystem) -def _remove_stores(*stores, filesystem): +def _remove_stores(*stores, filesystem: AbstractFileSystem): for s in stores: try: filesystem.rm(s, recursive=True, maxdepth=100) @@ -163,8 +163,8 @@ def _unsafe_create_optimized_zarr_store( ): with dask.config.set(DEFAULT_DASK_CONF): logger.info("Rechunking in progress, this will take some time.") - is_ds_zarr = is_zarr(in_files) - ds = read_dataset(in_files, index=None, var_name=var_name) + is_ds_zarr = is_zarr_path(in_files) + ds = read_dataset(in_files, standard_var=None, var_name=var_name) # drop all non essential data variables ds = ds.drop_vars(filter(lambda v: v not in var_name, ds.data_vars.keys())) if len(ds.data_vars.keys()) == 0: @@ -192,7 +192,7 @@ def _unsafe_create_optimized_zarr_store( ds[data_var].encoding = {} acc = {} for dim in ds[data_var].dims: - acc.update({dim: utils._get_chunksizes(ds)[dim][0]}) + acc.update({dim: utils.get_chunksizes(ds)[dim][0]}) target_chunks.update({data_var: acc}) for c in ds.coords: ds[c].encoding = {} @@ -216,7 +216,7 @@ def _build_default_chunking(ds: Dataset) -> dict: return chunking -def _is_rechunking_unnecessary(ds, chunking) -> bool: +def _is_rechunking_unnecessary(ds: Dataset, chunking: dict[str, int] | None) -> bool: cp = copy.deepcopy(ds.chunks) if chunking is None: return len(ds.chunks["time"]) == 1 diff --git a/icclim/tests/test_calc_operation.py b/icclim/tests/test_calc_operation.py deleted file mode 100644 index ed8d8798..00000000 --- a/icclim/tests/test_calc_operation.py +++ /dev/null @@ -1,197 +0,0 @@ -from __future__ import annotations - -from typing import Callable -from unittest.mock import MagicMock, patch - -import pytest - -from icclim.icclim_exceptions import InvalidIcclimArgumentError -from icclim.models.constants import PRECIPITATION, TEMPERATURE -from icclim.models.frequency import Frequency -from icclim.models.index_config import CfVariable -from icclim.models.user_index_config import LogicalOperation -from icclim.tests.testing_utils import stub_pr, stub_tas, stub_user_index -from icclim.user_indices import calc_operation -from icclim.user_indices.calc_operation import ( - CalcOperation, - anomaly, - compute_user_index, - count_events, - max_consecutive_event_count, - run_mean, - run_sum, -) - - -class Test_compute: - def test_error_bad_operation(self): - # GIVEN - cf_var = CfVariable("tas", stub_tas(), stub_tas()) - user_index = stub_user_index([cf_var]) - user_index.calc_operation = "pouet pouet" - user_index.frequency = Frequency.MONTH - # WHEN - with pytest.raises(InvalidIcclimArgumentError): - compute_user_index(user_index) - - def test_simple(self): - # GIVEN - cf_var = CfVariable("tas", stub_tas(), stub_tas()) - user_index = stub_user_index([cf_var]) - user_index.calc_operation = "max" - user_index.frequency = Frequency.MONTH - # WHEN - result = compute_user_index(user_index) - # THEN - assert result.data[0] == 1 - - def test_simple_percentile_pr(self): - # GIVEN - cf_var = CfVariable("tas", stub_pr(5), stub_pr(5)) - cf_var.study_da.data[15:30] += 10 - cf_var.study_da.data[366 + 15 : 366 + 30] = 2 # Ignore because not in base - cf_var.reference_da = cf_var.study_da.sel( - time=cf_var.study_da.time.dt.year == 2042 - ) - user_index = stub_user_index([cf_var]) - user_index.calc_operation = CalcOperation.MIN - user_index.thresh = "90p" - user_index.logical_operation = LogicalOperation.GREATER_OR_EQUAL_THAN - user_index.var_type = PRECIPITATION - user_index.frequency = Frequency.YEAR - # WHEN - result = compute_user_index(user_index) - # THEN - assert result.data[0] == 5 - - def test_simple_percentile_temp(self): - cf_var = CfVariable("tas", stub_tas(5), stub_tas(5)) - cf_var.study_da.data[15:30] = 1 - cf_var.reference_da = cf_var.study_da.sel( - time=cf_var.study_da.time.dt.year.isin([2042, 2043]) - ) - user_index = stub_user_index([cf_var]) - user_index.calc_operation = "min" - user_index.thresh = "10p" - user_index.logical_operation = LogicalOperation.LOWER_OR_EQUAL_THAN - user_index.var_type = TEMPERATURE - user_index.frequency = Frequency.MONTH - # WHEN - result = compute_user_index(user_index) - # THEN - assert result.data[0] == 1 - assert result.data[1] == 5 - - @patch("icclim.models.user_index_config.UserIndexConfig") - @patch("icclim.models.index_config.CfVariable") - def test_error_anomaly(self, config_mock: MagicMock, cf_var_mock: MagicMock): - config_mock.cf_vars = [cf_var_mock] - cf_var_mock.reference_da = None - with pytest.raises(InvalidIcclimArgumentError): - anomaly(config_mock) - - @patch("icclim.models.user_index_config.UserIndexConfig") - @patch("icclim.user_indices.operators.anomaly") - @patch("icclim.models.index_config.CfVariable") - def test_success_anomaly( - self, config_mock: MagicMock, op_mock: MagicMock, cf_var_mock: MagicMock - ): - config_mock.cf_vars = [cf_var_mock] - cf_var_mock.reference_da = [1, 2, 3] # no-op, just need to mock a valid length - anomaly(config_mock) - op_mock.assert_called_once() - - @patch("icclim.models.user_index_config.UserIndexConfig") - def test_error_run_sum(self, config_mock: MagicMock): - config_mock.extreme_mode = None - with pytest.raises(InvalidIcclimArgumentError): - run_sum(config_mock) - config_mock.extreme_mode = {} - config_mock.window_width = None - with pytest.raises(InvalidIcclimArgumentError): - run_sum(config_mock) - - @patch("icclim.user_indices.operators.run_sum") - @patch("icclim.models.user_index_config.UserIndexConfig") - def test_success_run_sum(self, config_mock: MagicMock, op_mock: MagicMock): - run_sum(config_mock) - op_mock.assert_called_once() - - @patch("icclim.models.user_index_config.UserIndexConfig") - def test_error_run_mean(self, config_mock: MagicMock): - config_mock.extreme_mode = None - with pytest.raises(InvalidIcclimArgumentError): - run_mean(config_mock) - config_mock.extreme_mode = {} - config_mock.window_width = None - with pytest.raises(InvalidIcclimArgumentError): - run_mean(config_mock) - - @patch("icclim.user_indices.operators.run_mean") - @patch("icclim.models.user_index_config.UserIndexConfig") - def test_success_run_mean(self, config_mock: MagicMock, op_mock: MagicMock): - run_mean(config_mock) - op_mock.assert_called_once() - - @patch("icclim.models.user_index_config.UserIndexConfig") - def test_error_max_consecutive_event_count(self, config_mock: MagicMock): - config_mock.logical_operation = None - with pytest.raises(InvalidIcclimArgumentError): - max_consecutive_event_count(config_mock) - config_mock.logical_operation = {} - config_mock.thresh = None - with pytest.raises(InvalidIcclimArgumentError): - max_consecutive_event_count(config_mock) - config_mock.logical_operation = {} - config_mock.thresh = [] - with pytest.raises(InvalidIcclimArgumentError): - max_consecutive_event_count(config_mock) - - @patch("icclim.user_indices.operators.max_consecutive_event_count") - @patch("icclim.models.user_index_config.UserIndexConfig") - def test_success_max_consecutive_event_count( - self, config_mock: MagicMock, op_mock: MagicMock - ): - max_consecutive_event_count(config_mock) - op_mock.assert_called_once() - - @patch("icclim.models.user_index_config.UserIndexConfig") - def test_error_count_events(self, config_mock: MagicMock): - config_mock.nb_event_config = None - with pytest.raises(InvalidIcclimArgumentError): - count_events(config_mock) - - @patch("icclim.user_indices.operators.count_events") - @patch("icclim.models.user_index_config.UserIndexConfig") - def test_success_count_events(self, config_mock: MagicMock, op_mock: MagicMock): - count_events(config_mock) - op_mock.assert_called_once() - - @pytest.mark.parametrize( - "reducer", - [ - calc_operation.sum, - calc_operation.mean, - calc_operation.min, - calc_operation.max, - ], - ) - @patch("icclim.models.user_index_config.UserIndexConfig") - def test_error_simple_reducer(self, config_mock: MagicMock, reducer: Callable): - config_mock.cf_vars = [1, 2, 3] - with pytest.raises(InvalidIcclimArgumentError): - reducer(config_mock) - config_mock.cf_vars = [MagicMock()] - config_mock.thresh = [] - with pytest.raises(InvalidIcclimArgumentError): - reducer(config_mock) - - @pytest.mark.parametrize("reducer", ["sum", "mean", "min", "max"]) - @patch("icclim.models.user_index_config.UserIndexConfig") - def test_success_simple_reducer(self, config_mock: MagicMock, reducer: str): - config_mock.calc_operation = reducer - config_mock.cf_vars = [MagicMock()] - config_mock.thresh = 42 - with patch("icclim.user_indices.operators." + reducer) as op_mock: - compute_user_index(config_mock) - op_mock.assert_called_once() diff --git a/icclim/tests/test_cf_calendar.py b/icclim/tests/test_cf_calendar.py index 1abdd3b6..be1e0340 100644 --- a/icclim/tests/test_cf_calendar.py +++ b/icclim/tests/test_cf_calendar.py @@ -5,13 +5,14 @@ import pytest import xarray as xr -from icclim.models.cf_calendar import CfCalendar +from icclim.icclim_exceptions import InvalidIcclimArgumentError +from icclim.models.cf_calendar import CfCalendarRegistry class Test_CfCalendar: def test_error_lookup(self): - with pytest.raises(TypeError): - CfCalendar.lookup("NOPE!") + with pytest.raises(InvalidIcclimArgumentError): + CfCalendarRegistry.lookup("NOPE!") @pytest.mark.parametrize( "cal", @@ -26,44 +27,44 @@ def test_error_lookup(self): ], ) def test_success_lookup(self, cal): - assert CfCalendar.lookup(cal).aliases[0] == cal + assert CfCalendarRegistry.lookup(cal).aliases[0] == cal def test_NO_LEAP(self): da = xr.DataArray(pd.date_range("2000", periods=100, freq="YS"), dims=["time"]) - res = CfCalendar.NO_LEAP.is_leap(da) + res = CfCalendarRegistry.NO_LEAP.is_leap(da) np.testing.assert_array_equal(False, res) def test_DAYS_360(self): da = xr.DataArray(pd.date_range("2000", periods=100, freq="YS"), dims=["time"]) - res = CfCalendar.DAYS_360.is_leap(da) + res = CfCalendarRegistry.DAYS_360.is_leap(da) np.testing.assert_array_equal(False, res) def test_ALL_LEAP(self): da = xr.DataArray(pd.date_range("2000", periods=100, freq="YS"), dims=["time"]) - res = CfCalendar.ALL_LEAP.is_leap(da) + res = CfCalendarRegistry.ALL_LEAP.is_leap(da) np.testing.assert_array_equal(True, res) def test_PROLEPTIC_GREGORIAN(self): - res_1 = CfCalendar.PROLEPTIC_GREGORIAN.is_leap( + res_1 = CfCalendarRegistry.PROLEPTIC_GREGORIAN.is_leap( xr.DataArray(np.asarray([40, 1600])) ) - res_2 = CfCalendar.PROLEPTIC_GREGORIAN.is_leap( + res_2 = CfCalendarRegistry.PROLEPTIC_GREGORIAN.is_leap( xr.DataArray(np.asarray([42, 1500, 1700])) ) np.testing.assert_array_equal(True, res_1) np.testing.assert_array_equal(False, res_2) def test_JULIAN(self): - res_1 = CfCalendar.JULIAN.is_leap( + res_1 = CfCalendarRegistry.JULIAN.is_leap( xr.DataArray(np.asarray([40, 1500, 1600, 1700])) ) - res_2 = CfCalendar.JULIAN.is_leap(xr.DataArray(np.asarray([42]))) + res_2 = CfCalendarRegistry.JULIAN.is_leap(xr.DataArray(np.asarray([42]))) np.testing.assert_array_equal(True, res_1) np.testing.assert_array_equal(False, res_2) @pytest.mark.parametrize( "cal", - [CfCalendar.STANDARD, CfCalendar.NONE], + [CfCalendarRegistry.STANDARD, CfCalendarRegistry.NONE], ) def test_STANDARD(self, cal): res_1 = cal.is_leap(xr.DataArray(np.asarray([40, 1500, 1600]))) diff --git a/icclim/tests/test_ecad_indices.py b/icclim/tests/test_ecad_indices.py index 24932bcc..73049b6c 100644 --- a/icclim/tests/test_ecad_indices.py +++ b/icclim/tests/test_ecad_indices.py @@ -1,406 +1,25 @@ from __future__ import annotations -import numpy as np import pytest -from icclim.ecad.ecad_functions import ( - cfd, - csdi, - csu, - fd, - gd4, - hd17, - prcptot, - su, - tn10p, - tr, - tx90p, - wsdi, -) -from icclim.ecad.ecad_indices import EcadIndex +from icclim.ecad.ecad_indices import EcadIndexRegistry from icclim.icclim_exceptions import InvalidIcclimArgumentError -from icclim.models.frequency import Frequency -from icclim.models.index_config import CfVariable, IndexConfig -from icclim.models.netcdf_version import NetcdfVersion -from icclim.models.quantile_interpolation import QuantileInterpolation -from icclim.tests.testing_utils import K2C, stub_pr, stub_tas def test_listing(): - res = EcadIndex.list() - assert len(res) == len(EcadIndex) + res = EcadIndexRegistry.list() + assert len(res) == 49 class Test_index_from_string: def test_simple(self): - res = EcadIndex.lookup("SU") - assert res == EcadIndex.SU + res = EcadIndexRegistry.lookup("SU") + assert res == EcadIndexRegistry.SU def test_lowercase(self): - res = EcadIndex.lookup("tx90p") - assert res == EcadIndex.TX90P + res = EcadIndexRegistry.lookup("tx90p") + assert res == EcadIndexRegistry.TX90P def test_error(self): with pytest.raises(InvalidIcclimArgumentError): - EcadIndex.lookup("cacahuête") - - -@pytest.mark.parametrize("use_dask", [True, False]) -def test_tn10p(use_dask): - tas = stub_tas(use_dask=use_dask) - conf = IndexConfig( - frequency=Frequency.MONTH, - cf_variables=[CfVariable("tas", tas, tas)], - netcdf_version=NetcdfVersion.NETCDF4, - window_width=2, - interpolation=QuantileInterpolation.MEDIAN_UNBIASED, - save_percentile=True, - index=EcadIndex.TN10P.climate_index, - ) - res = tn10p(conf) - assert res is not None - - -class Test_SU: - @pytest.mark.parametrize("use_dask", [True, False]) - def test_su_default_threshold(self, use_dask): - tas = stub_tas(tas_value=26 + K2C, use_dask=use_dask) - tas[:5] = 0 - conf = IndexConfig( - frequency=Frequency.MONTH, - cf_variables=[CfVariable("tas", tas)], - netcdf_version=NetcdfVersion.NETCDF4, - index=EcadIndex.SU.climate_index, - ) - res = su(conf) - assert res is not None - assert res[0][0] == 26 # January - - @pytest.mark.parametrize("use_dask", [True, False]) - def test_su_custom_threshold(self, use_dask): - tas = stub_tas(use_dask=use_dask) - tas[:5] = 50 + K2C - conf = IndexConfig( - cf_variables=[CfVariable("tas", tas)], - frequency=Frequency.MONTH, - threshold=40, - netcdf_version=NetcdfVersion.NETCDF4, - index=EcadIndex.SU.climate_index, - ) - res = su(conf) - assert res is not None - assert res[0][0] == 5 # January - - -class Test_TR: - @pytest.mark.parametrize("use_dask", [True, False]) - def test_default_threshold(self, use_dask): - tas = stub_tas(tas_value=26 + K2C, use_dask=use_dask) - tas[:5] = 0 - conf = IndexConfig( - cf_variables=[CfVariable("tas", tas)], - frequency=Frequency.MONTH, - netcdf_version=NetcdfVersion.NETCDF4, - index=EcadIndex.TR.climate_index, - ) - res = tr(conf) - assert res is not None - assert res[0][0] == 26 # January - - @pytest.mark.parametrize("use_dask", [True, False]) - def test_custom_threshold(self, use_dask): - tas = stub_tas(use_dask=use_dask) - tas[:5] = 50 + K2C - conf = IndexConfig( - cf_variables=[CfVariable("tas", tas)], - frequency=Frequency.MONTH, - threshold=40, - netcdf_version=NetcdfVersion.NETCDF4, - index=EcadIndex.TR.climate_index, - ) - res = tr(conf) - assert res is not None - assert res[0][0] == 5 # January - - -class Test_prcptot: - @pytest.mark.parametrize("use_dask", [True, False]) - def test_default_threshold(self, use_dask): - pr = stub_pr(value=2, use_dask=use_dask) - pr[:10] = 0 - conf = IndexConfig( - frequency=Frequency.MONTH, - cf_variables=[CfVariable("pr", pr)], - netcdf_version=NetcdfVersion.NETCDF4, - index=EcadIndex.PRCPTOT.climate_index, - ) - res = prcptot(conf) - assert res is not None - np.testing.assert_almost_equal(res[0][0], 42.0, 14) - - -class Test_csu: - @pytest.mark.parametrize("use_dask", [True, False]) - def test_default_threshold(self, use_dask): - tas = stub_tas(tas_value=26 + K2C, use_dask=use_dask) - tas[10:15] = 0 - conf = IndexConfig( - cf_variables=[CfVariable("tas", tas)], - frequency=Frequency.MONTH, - netcdf_version=NetcdfVersion.NETCDF4, - index=EcadIndex.CSU.climate_index, - ) - res = csu(conf) - assert res is not None - assert res[0][0] == 16 # January - - @pytest.mark.parametrize("use_dask", [True, False]) - def test_custom_threshold(self, use_dask): - tas = stub_tas(use_dask=use_dask) - tas[:5] = 50 + K2C - tas[10:20] = 50 + K2C - conf = IndexConfig( - cf_variables=[CfVariable("tas", tas)], - frequency=Frequency.MONTH, - threshold=40, - netcdf_version=NetcdfVersion.NETCDF4, - index=EcadIndex.CSU.climate_index, - ) - res = csu(conf) - assert res is not None - assert res[0][0] == 10 # January - - -class Test_gd4: - @pytest.mark.parametrize("use_dask", [True, False]) - def test_default_threshold(self, use_dask): - tas = stub_tas(tas_value=26 + K2C, use_dask=use_dask) - tas[5:15] = 0 - conf = IndexConfig( - cf_variables=[CfVariable("tas", tas)], - frequency=Frequency.MONTH, - netcdf_version=NetcdfVersion.NETCDF4, - index=EcadIndex.GD4.climate_index, - ) - res = gd4(conf) - assert res is not None - expected = (26 - 4) * 21 - assert res[0][0] == expected # 21 days in January above 4 degC (at 26degC) - - @pytest.mark.parametrize("use_dask", [True, False]) - def test_custom_threshold(self, use_dask): - tas = stub_tas(tas_value=26 + K2C, use_dask=use_dask) - tas[5:15] = 0 - conf = IndexConfig( - cf_variables=[CfVariable("tas", tas)], - frequency=Frequency.MONTH, - threshold=5, - netcdf_version=NetcdfVersion.NETCDF4, - index=EcadIndex.GD4.climate_index, - ) - res = gd4(conf) - assert res is not None - expected = (26 - 5) * 21 - assert res[0][0] == expected # 21 days in January above 4 degC (at 26degC) - - -class Test_cfd: - @pytest.mark.parametrize("use_dask", [True, False]) - def test_default_threshold(self, use_dask): - tas = stub_tas(tas_value=26 + K2C, use_dask=use_dask) - tas[5:15] = 0 - conf = IndexConfig( - cf_variables=[CfVariable("tas", tas)], - frequency=Frequency.MONTH, - netcdf_version=NetcdfVersion.NETCDF4, - index=EcadIndex.CFD.climate_index, - ) - res = cfd(conf) - assert res is not None - assert res[0][0] == 10 - - @pytest.mark.parametrize("use_dask", [True, False]) - def test_custom_threshold(self, use_dask): - tas = stub_tas(tas_value=26 + K2C, use_dask=use_dask) - tas[5:10] = 0 - tas[10:15] = 4 - conf = IndexConfig( - cf_variables=[CfVariable("tas", tas)], - frequency=Frequency.MONTH, - threshold=5, - netcdf_version=NetcdfVersion.NETCDF4, - index=EcadIndex.CFD.climate_index, - ) - res = cfd(conf) - assert res is not None - assert res[0][0] == 10 - - -class Test_fd: - @pytest.mark.parametrize("use_dask", [True, False]) - def test_default_threshold(self, use_dask): - tas = stub_tas(tas_value=26 + K2C, use_dask=use_dask) - tas[5:15] = 0 - tas[20:25] = 0 - conf = IndexConfig( - cf_variables=[CfVariable("tas", tas)], - frequency=Frequency.MONTH, - netcdf_version=NetcdfVersion.NETCDF4, - index=EcadIndex.FD.climate_index, - ) - res = fd(conf) - assert res is not None - assert res[0][0] == 15 - - @pytest.mark.parametrize("use_dask", [True, False]) - def test_custom_threshold(self, use_dask): - tas = stub_tas(tas_value=26 + K2C, use_dask=use_dask) - tas[5:10] = 0 - tas[10:15] = 4 - conf = IndexConfig( - cf_variables=[CfVariable("tas", tas)], - frequency=Frequency.MONTH, - threshold=5, - netcdf_version=NetcdfVersion.NETCDF4, - index=EcadIndex.FD.climate_index, - ) - res = fd(conf) - assert res is not None - assert res[0][0] == 10 - - -class Test_hd17: - @pytest.mark.parametrize("use_dask", [True, False]) - def test_default_threshold(self, use_dask): - tas = stub_tas(tas_value=27 + K2C, use_dask=use_dask) - tas[5:10] = 0 - conf = IndexConfig( - cf_variables=[CfVariable("tas", tas)], - frequency=Frequency.MONTH, - netcdf_version=NetcdfVersion.NETCDF4, - index=EcadIndex.HD17.climate_index, - ) - res = hd17(conf) - assert res is not None - assert res[0][0] == 5 * (17 + K2C) - - @pytest.mark.parametrize("use_dask", [True, False]) - def test_custom_threshold(self, use_dask): - tas = stub_tas(tas_value=27 + K2C, use_dask=use_dask) - tas[5:10] = 0 - conf = IndexConfig( - cf_variables=[CfVariable("tas", tas)], - frequency=Frequency.MONTH, - threshold=5, - netcdf_version=NetcdfVersion.NETCDF4, - index=EcadIndex.HD17.climate_index, - ) - res = hd17(conf) - assert res is not None - assert res[0][0] == 5 * (5 + K2C) - - -class TestTx90p: - @pytest.mark.parametrize("use_dask", [True, False]) - def test_no_bootstrap_no_overlap(self, use_dask): - tas = stub_tas(tas_value=27 + K2C, use_dask=use_dask) - tas[5:10] = 0 - base_tas = tas.sel(time=slice("2042-01-01", "2042-12-31")) - tas = tas.sel(time=slice("2042-01-01", "2045-12-31")) - conf = IndexConfig( - cf_variables=[CfVariable("tas", tas, base_tas)], - frequency=Frequency.MONTH, - netcdf_version=NetcdfVersion.NETCDF4, - index=EcadIndex.TX90P.climate_index, - ) - res, _ = tx90p(conf) - assert "reference_epoch" not in res.attrs.keys() - - @pytest.mark.parametrize("use_dask", [True, False]) - def test_no_bootstrap_1_year_base(self, use_dask): - tas = stub_tas(tas_value=27 + K2C, use_dask=use_dask) - base_tas = tas.sel( - time=slice("2042-01-01", "2042-12-31"), - ) - tas = tas.sel(time=slice("2042-01-01", "2045-12-31")) - conf = IndexConfig( - cf_variables=[CfVariable("tas", tas, base_tas)], - frequency=Frequency.MONTH, - netcdf_version=NetcdfVersion.NETCDF4, - index=EcadIndex.TX90P.climate_index, - ) - res, _ = tx90p(conf) - assert "reference_epoch" not in res.attrs.keys() - - @pytest.mark.parametrize("use_dask", [True, False]) - def test_bootstrap_2_years(self, use_dask): - tas = stub_tas(tas_value=27 + K2C, use_dask=use_dask) - base_tas = tas.sel( - time=slice("2042-01-01", "2043-12-31"), - ) - tas = tas.sel(time=slice("2042-01-01", "2045-12-31")) - conf = IndexConfig( - cf_variables=[CfVariable("tas", tas, base_tas)], - frequency=Frequency.MONTH, - netcdf_version=NetcdfVersion.NETCDF4, - index=EcadIndex.TX90P.climate_index, - ) - res, _ = tx90p(conf) - assert res.attrs["reference_epoch"] == ["2042-01-01", "2043-12-31"] - - -class TestWsdi: - @pytest.mark.parametrize("use_dask", [True, False]) - def test_wsdi_bootstrap_2_years(self, use_dask): - tas = stub_tas(tas_value=27 + K2C, use_dask=use_dask) - base_tas = tas.sel( - time=slice("2042-01-01", "2043-12-31"), - ) - tas = tas.sel(time=slice("2042-01-01", "2045-12-31")) - conf = IndexConfig( - cf_variables=[CfVariable("tas", tas, base_tas)], - frequency=Frequency.MONTH, - netcdf_version=NetcdfVersion.NETCDF4, - index=EcadIndex.TX90P.climate_index, - ) - res, _ = wsdi(conf) - assert res.attrs["reference_epoch"] == ["2042-01-01", "2043-12-31"] - - -class TestCsdi: - @pytest.mark.parametrize("use_dask", [True, False]) - def test_csdi_bootstrap_2_years(self, use_dask): - tas = stub_tas(tas_value=27 + K2C, use_dask=use_dask) - base_tas = tas.sel( - time=slice("2042-01-01", "2043-12-31"), - ) - tas = tas.sel(time=slice("2042-01-01", "2045-12-31")) - conf = IndexConfig( - cf_variables=[CfVariable("tas", tas, base_tas)], - frequency=Frequency.MONTH, - netcdf_version=NetcdfVersion.NETCDF4, - index=EcadIndex.TX90P.climate_index, - save_percentile=True, - ) - res, per = csdi(conf) - assert res.attrs["reference_epoch"] == ["2042-01-01", "2043-12-31"] - assert per.percentiles.values[0] == 10 - - @pytest.mark.parametrize("use_dask", [True, False]) - def test_csdi_custom_thresh(self, use_dask): - tas = stub_tas(tas_value=27 + K2C, use_dask=use_dask) - base_tas = tas.sel( - time=slice("2042-01-01", "2043-12-31"), - ) - tas = tas.sel(time=slice("2042-01-01", "2045-12-31")) - conf = IndexConfig( - cf_variables=[CfVariable("tas", tas, base_tas)], - frequency=Frequency.MONTH, - netcdf_version=NetcdfVersion.NETCDF4, - index=EcadIndex.TX90P.climate_index, - threshold=5, - save_percentile=True, - ) - res, per = csdi(conf) - assert res.attrs["reference_epoch"] == ["2042-01-01", "2043-12-31"] - assert per.percentiles.values[0] == 5 + EcadIndexRegistry.lookup("cacahuête") diff --git a/icclim/tests/test_frequency.py b/icclim/tests/test_frequency.py index e1a51320..74b33add 100644 --- a/icclim/tests/test_frequency.py +++ b/icclim/tests/test_frequency.py @@ -6,88 +6,81 @@ import pytest from icclim.icclim_exceptions import InvalidIcclimArgumentError -from icclim.models.frequency import Frequency, get_seasonal_time_updater +from icclim.models.frequency import FrequencyRegistry, get_seasonal_time_updater from icclim.tests.testing_utils import stub_tas class Test_build_frequency_over_frequency: def test_simple(self): - freq = Frequency.lookup(Frequency.YEAR) - assert freq == Frequency.YEAR + freq = FrequencyRegistry.lookup(FrequencyRegistry.YEAR) + assert freq == FrequencyRegistry.YEAR class Test_build_frequency_over_string: def test_error(self): with pytest.raises(InvalidIcclimArgumentError): - Frequency.lookup("yolo") + FrequencyRegistry.lookup("yolo") def test_simple(self): - freq = Frequency.lookup("year") - assert freq == Frequency.YEAR + freq = FrequencyRegistry.lookup("year") + assert freq == FrequencyRegistry.YEAR class Test_build_frequency_over_list: def test_lookup_list__keyword_error(self): with pytest.raises(InvalidIcclimArgumentError): - Frequency.lookup(["cacahuêtes"]) + FrequencyRegistry.lookup(["cacahuêtes"]) def test_lookup_string_error(self): with pytest.raises(InvalidIcclimArgumentError): - Frequency.lookup("cacahuêtes") + FrequencyRegistry.lookup("cacahuêtes") def test_lookup_month(self): - freq = Frequency.lookup(["month", [1, 4, 3]]) - assert freq == Frequency.CUSTOM + freq = FrequencyRegistry.lookup(["month", [1, 4, 3]]) assert freq.pandas_freq == "MS" assert freq.accepted_values == [] assert freq.post_processing is not None def test_lookup_season(self): - freq = Frequency.lookup(["season", [1, 2, 3, 4]]) - assert freq == Frequency.CUSTOM + freq = FrequencyRegistry.lookup(["season", [1, 2, 3, 4]]) assert freq.pandas_freq == "AS-JAN" assert freq.accepted_values == [] assert freq.post_processing is not None def test_lookup_season_tuple(self): - freq = Frequency.lookup(("season", [1, 2, 3, 4])) - assert freq == Frequency.CUSTOM + freq = FrequencyRegistry.lookup(("season", [1, 2, 3, 4])) assert freq.pandas_freq == "AS-JAN" assert freq.accepted_values == [] assert freq.post_processing is not None def test_lookup_pandas_freq(self): - freq = Frequency.lookup("3MS") - assert freq == Frequency.CUSTOM + freq = FrequencyRegistry.lookup("3MS") assert freq.pandas_freq == "3MS" assert freq.accepted_values == [] assert freq.post_processing is not None def test_lookup_winter__deprecated_tuple(self): - freq = Frequency.lookup(["season", ([11, 12], [1, 2, 3, 4])]) - assert freq == Frequency.CUSTOM + freq = FrequencyRegistry.lookup(["season", ([11, 12], [1, 2, 3, 4])]) assert freq.pandas_freq == "AS-NOV" assert freq.accepted_values == [] assert freq.post_processing is not None def test_lookup_error__non_consecutive_season(self): with pytest.raises(InvalidIcclimArgumentError): - Frequency.lookup(["season", ([12, 3])]) + FrequencyRegistry.lookup(["season", ([12, 3])]) def test_lookup_error__weird_months(self): with pytest.raises(InvalidIcclimArgumentError): - Frequency.lookup(["season", ([42, 0])]) + FrequencyRegistry.lookup(["season", ([42, 0])]) def test_lookup__winter(self): - freq = Frequency.lookup(["season", [11, 12, 1, 2]]) - assert freq == Frequency.CUSTOM + freq = FrequencyRegistry.lookup(["season", [11, 12, 1, 2]]) assert freq.pandas_freq == "AS-NOV" assert freq.accepted_values == [] assert freq.post_processing is not None def test_lookup_season__between_dates(self): - freq = Frequency.lookup(["season", ["07-19", "08-14"]]) - assert freq == Frequency.CUSTOM + freq = FrequencyRegistry.lookup(["season", ["07-19", "08-14"]]) assert freq.pandas_freq == "AS-JUL" assert freq.accepted_values == [] assert freq.post_processing is not None diff --git a/icclim/tests/test_generated_api.py b/icclim/tests/test_generated_api.py index c2675bc9..06897763 100644 --- a/icclim/tests/test_generated_api.py +++ b/icclim/tests/test_generated_api.py @@ -7,61 +7,68 @@ import pytest import icclim -from icclim.ecad.ecad_indices import EcadIndex -from icclim.icclim_logger import Verbosity -from icclim.models.constants import ( - MODIFIABLE_QUANTILE_WINDOW, - MODIFIABLE_THRESHOLD, - MODIFIABLE_UNIT, - QUANTILE_BASED, -) -from icclim.models.frequency import Frequency -from icclim.models.netcdf_version import NetcdfVersion -from icclim.models.quantile_interpolation import QuantileInterpolation +from icclim.ecad.ecad_indices import EcadIndexRegistry +from icclim.icclim_exceptions import InvalidIcclimArgumentError +from icclim.icclim_logger import VerbosityRegistry +from icclim.models.constants import QUANTILE_BASED +from icclim.models.frequency import FrequencyRegistry +from icclim.models.netcdf_version import NetcdfVersionRegistry +from icclim.models.quantile_interpolation import QuantileInterpolationRegistry +from icclim.models.standard_index import StandardIndex +from icclim.models.threshold import Threshold from icclim.tests.testing_utils import stub_tas -from icclim.user_indices.calc_operation import CalcOperation +from icclim.user_indices.calc_operation import CalcOperation, CalcOperationRegistry DEFAULT_ARGS = dict( in_files="pouet.nc", var_name=None, - slice_mode=Frequency.YEAR, + slice_mode=FrequencyRegistry.YEAR, time_range=None, out_file=None, ignore_Feb29th=False, - netcdf_version=NetcdfVersion.NETCDF4, - logs_verbosity=Verbosity.LOW, + netcdf_version=NetcdfVersionRegistry.NETCDF4, + logs_verbosity=VerbosityRegistry.LOW, + date_event=False, ) -def build_expected_args(index): - expected_call_args = {"index_name": index.name} +def build_expected_args(index: StandardIndex): + expected_call_args = {"index_name": index.short_name.upper()} expected_call_args.update(DEFAULT_ARGS) qualifiers = [] if index.qualifiers is None else index.qualifiers - if MODIFIABLE_THRESHOLD in qualifiers: - expected_call_args.update({"threshold": None}) if QUANTILE_BASED in qualifiers: expected_call_args.update( { "base_period_time_range": None, "only_leap_years": False, - "interpolation": QuantileInterpolation.MEDIAN_UNBIASED, - "save_percentile": False, + "interpolation": QuantileInterpolationRegistry.MEDIAN_UNBIASED.name, + "save_thresholds": False, } ) - if MODIFIABLE_QUANTILE_WINDOW in qualifiers: - expected_call_args.update({"window_width": 5}) - if MODIFIABLE_UNIT in qualifiers: - expected_call_args.update({"out_unit": None}) + if index.threshold is not None: + if isinstance(index.threshold, str): + t = Threshold(index.threshold) + elif isinstance(index.threshold, (list, tuple)): + t = [] + for thresh in index.threshold: + if isinstance(thresh, str): + t.append(Threshold(thresh)) + else: + t.append(thresh) + else: + t = index.threshold + expected_call_args.update({"threshold": t}) + expected_call_args.update({"out_unit": index.output_unit}) return expected_call_args @patch("icclim.index") def test_generated_api(generic_index_fun_mock: MagicMock): - for i in EcadIndex: + for i in EcadIndexRegistry.values(): print(i) # GIVEN - api_index_fun = eval(f"icclim.{i.name.lower()}") + api_index_fun = eval(f"icclim.{i.short_name.lower()}") # WHEN api_index_fun(**DEFAULT_ARGS) # THEN @@ -74,16 +81,22 @@ def test_custom_index(index_fun_mock: MagicMock): user_index_args = dict( in_files="pouet_file.nc", var_name=None, - slice_mode=Frequency.YEAR, + slice_mode=FrequencyRegistry.YEAR, time_range=None, out_file=None, base_period_time_range=None, only_leap_years=False, ignore_Feb29th=False, out_unit=None, - netcdf_version=NetcdfVersion.NETCDF4, - save_percentile=False, - logs_verbosity=Verbosity.LOW, + netcdf_version=NetcdfVersionRegistry.NETCDF4, + logs_verbosity=VerbosityRegistry.LOW, + doy_window_width=5, + save_thresholds=False, + date_event=False, + sampling_method="resample", + min_spell_length=6, + rolling_window_width=5, + interpolation="median_unbiased", user_index={ "index_name": "pouet", "calc_operation": "nb_events", @@ -132,20 +145,21 @@ def test_txx__months_slice_mode(): @pytest.mark.parametrize( "operator, expectation_year_1, expectation_year_2", [ - (CalcOperation.MIN, 303.15, 280.15), - (CalcOperation.MAX, 303.15, 280.15), - (CalcOperation.SUM, 303.15, 280.15), # values below 275 are filtered out - (CalcOperation.MEAN, 303.15, 280.15), - (CalcOperation.EVENT_COUNT, 1, 1), - (CalcOperation.MAX_NUMBER_OF_CONSECUTIVE_EVENTS, 1, 1), + (CalcOperationRegistry.MIN, 303.15, 280.15), + (CalcOperationRegistry.MAX, 303.15, 280.15), + (CalcOperationRegistry.SUM, 303.15, 280.15), + # values below 275 are filtered out + (CalcOperationRegistry.MEAN, 303.15, 280.15), + (CalcOperationRegistry.EVENT_COUNT, 1, 1), + (CalcOperationRegistry.MAX_NUMBER_OF_CONSECUTIVE_EVENTS, 1, 1), ], ) def test_custom_index__season_slice_mode( - operator, expectation_year_1, expectation_year_2 + operator: CalcOperation, expectation_year_1, expectation_year_2 ): - tas = stub_tas(2.0) - tas.loc[{"time": "2042-01-01"}] = 303.15 - tas.loc[{"time": "2042-12-01"}] = 280.15 + tas = stub_tas(275.0) + tas.loc[{"time": "2043-01-01"}] = 303.15 + tas.loc[{"time": "2043-12-01"}] = 280.15 res = icclim.custom_index( in_files=tas, slice_mode=["season", [12, 1]], @@ -156,23 +170,27 @@ def test_custom_index__season_slice_mode( "logical_operation": "gt", "thresh": 275, }, - ) - np.testing.assert_almost_equal(res.pouet.isel(time=0), expectation_year_1) - np.testing.assert_almost_equal(res.pouet.isel(time=1), expectation_year_2) + ).compute() + # missing values algo applied for first and last years + np.testing.assert_almost_equal(res.pouet.isel(time=0), np.NAN) + np.testing.assert_almost_equal(res.pouet.isel(time=-1), np.NAN) + np.testing.assert_almost_equal(res.pouet.isel(time=1), expectation_year_1) + np.testing.assert_almost_equal(res.pouet.isel(time=2), expectation_year_2) # integration test @pytest.mark.parametrize( "operator, expectation_year_1, expectation_year_2", [ - (CalcOperation.RUN_MEAN, 2, 2), - (CalcOperation.RUN_SUM, 14, 14), + (CalcOperationRegistry.RUN_MEAN, 275.0, 276.0), + (CalcOperationRegistry.RUN_SUM, 1925.0, 1932.0), ], ) def test_custom_index_run_algos__season_slice_mode( operator, expectation_year_1, expectation_year_2 ): - tas = stub_tas(2.0) + tas = stub_tas(275.0) + tas.loc[{"time": "2043-12-01"}] = 282.0 res = icclim.custom_index( in_files=tas, slice_mode=["season", [12, 1]], @@ -184,21 +202,160 @@ def test_custom_index_run_algos__season_slice_mode( "window_width": 7, }, ) - np.testing.assert_almost_equal(res.pouet.isel(time=0), expectation_year_1) - np.testing.assert_almost_equal(res.pouet.isel(time=1), expectation_year_2) + # missing values algo applied for first and last years + np.testing.assert_almost_equal(res.pouet.isel(time=0), np.NAN) + np.testing.assert_almost_equal(res.pouet.isel(time=-1), np.NAN) + np.testing.assert_almost_equal(res.pouet.isel(time=1), expectation_year_1) + np.testing.assert_almost_equal(res.pouet.isel(time=2), expectation_year_2) + + +def test_custom_index_anomaly__error_single_var(): + tas = stub_tas(2.0) + with pytest.raises(InvalidIcclimArgumentError): + # error: it needs 2 vars or 1 var and a ref period + icclim.custom_index( + in_files=tas, + user_index={ + "index_name": "anomaly", + "calc_operation": CalcOperationRegistry.ANOMALY, + }, + ) -def test_custom_index_anomaly__season_slice_mode(): +def test_custom_index_anomaly__error_(): + tas = stub_tas(2.0) + with pytest.raises(InvalidIcclimArgumentError): + # error: Can't resample the reference variable if it is already a + # subsample of the studied variable. (need another sampling_method) + icclim.custom_index( + in_files=tas, + slice_mode=["season", [12, 1]], + base_period_time_range=[datetime(2042, 1, 1), datetime(2044, 12, 31)], + user_index={ + "index_name": "anomaly", + "calc_operation": CalcOperationRegistry.ANOMALY, + }, + ) + + +def test_custom_index_anomaly__datetime_ref_period(): tas = stub_tas(2.0) tas.loc[{"time": "2045-01-01"}] = 300 res = icclim.custom_index( in_files=tas, slice_mode=["season", [12, 1]], - var_name="a_name", + base_period_time_range=[datetime(2042, 1, 1), datetime(2044, 12, 31)], + sampling_method="groupby_ref_and_resample_study", user_index={ "index_name": "anomaly", - "calc_operation": CalcOperation.ANOMALY, - "ref_time_range": [datetime(2042, 1, 1), datetime(2044, 12, 31)], + "calc_operation": CalcOperationRegistry.ANOMALY, }, - ) + ).compute() + # missing values algo applied for first and last years + np.testing.assert_almost_equal(res.anomaly.sel(time="2041"), np.NAN) + np.testing.assert_almost_equal(res.anomaly.sel(time="2042"), 0) + np.testing.assert_almost_equal(res.anomaly.sel(time="2043"), 0) + np.testing.assert_almost_equal(res.anomaly.sel(time="2044"), 4.80645161) + np.testing.assert_almost_equal(res.anomaly.sel(time="2045"), 0) + np.testing.assert_almost_equal(res.anomaly.sel(time="2046"), np.NAN) + + +def test_custom_index_anomaly__groupby_and_resample_month(): + tas = stub_tas(2.0) + tas.loc[{"time": "2045-01-01"}] = 300 + res = icclim.custom_index( + in_files=tas, + slice_mode="month", + base_period_time_range=[datetime(2042, 1, 1), datetime(2044, 12, 31)], + sampling_method="groupby_ref_and_resample_study", + user_index={ + "index_name": "anomaly", + "calc_operation": CalcOperationRegistry.ANOMALY, + }, + ).compute() + np.testing.assert_almost_equal(res.anomaly.sel(time="2045-01"), 9.61290323) + + +def test_custom_index_anomaly__groupby_and_resample_year(): + tas = stub_tas(2.0) + tas.loc[{"time": "2045-01-01"}] = 300 + res = icclim.custom_index( + in_files=tas, + slice_mode="year", + base_period_time_range=[datetime(2042, 1, 1), datetime(2044, 12, 31)], + sampling_method="groupby_ref_and_resample_study", + user_index={ + "index_name": "anomaly", + "calc_operation": CalcOperationRegistry.ANOMALY, + }, + ).compute() + print(res.anomaly.sel(time="2045")) + np.testing.assert_almost_equal(res.anomaly.sel(time="2045"), 0.81643836) + + +def test_custom_index_anomaly__groupby_and_resample_day(): + tas = stub_tas(2.0) + tas.loc[{"time": "2045-01-01"}] = 300 + res = icclim.custom_index( + in_files=tas, + slice_mode="day", + base_period_time_range=[datetime(2042, 1, 1), datetime(2044, 12, 31)], + sampling_method="groupby_ref_and_resample_study", + user_index={ + "index_name": "anomaly", + "calc_operation": CalcOperationRegistry.ANOMALY, + }, + ).compute() + np.testing.assert_almost_equal(res.anomaly.sel(time="2045-01-01"), 298) + + +def test_custom_index_anomaly__groupby_and_resample_hour(): + tas = stub_tas(2.0) + tas.loc[{"time": "2045-01-01"}] = 300 + with pytest.raises(NotImplementedError): + icclim.custom_index( + in_files=tas, + slice_mode="hour", + base_period_time_range=[datetime(2042, 1, 1), datetime(2044, 12, 31)], + sampling_method="groupby_ref_and_resample_study", + user_index={ + "index_name": "anomaly", + "calc_operation": CalcOperationRegistry.ANOMALY, + }, + ) + + +def test_custom_index_anomaly__grouby_season(): + tas = stub_tas(2.0) + tas.loc[{"time": "2045-01-01"}] = 300 + res = icclim.custom_index( + in_files=tas, + slice_mode=["season", [12, 1]], + base_period_time_range=[datetime(2042, 1, 1), datetime(2044, 12, 31)], + sampling_method="groupby", + user_index={ + "index_name": "anomaly", + "calc_operation": CalcOperationRegistry.ANOMALY, + }, + ).compute() + # missing values algo applied for first and last years np.testing.assert_almost_equal(res.anomaly, 0.96129032) + + +def test_custom_index_anomaly__grouby_month(): + tas = stub_tas(2.0) + tas.loc[{"time": "2045-01-01"}] = 300 + res = icclim.custom_index( + in_files=tas, + slice_mode="month", + base_period_time_range=[datetime(2042, 1, 1), datetime(2044, 12, 31)], + sampling_method="groupby", + user_index={ + "index_name": "anomaly", + "calc_operation": CalcOperationRegistry.ANOMALY, + }, + ).compute() + # missing values algo applied for first and last years + assert len(res.anomaly.month) == 12 + np.testing.assert_almost_equal(res.anomaly.sel(month=2), 0) + np.testing.assert_almost_equal(res.anomaly.sel(month=1), 1.92258065) diff --git a/icclim/tests/test_index_group.py b/icclim/tests/test_index_group.py index ace024ba..c73f6bed 100644 --- a/icclim/tests/test_index_group.py +++ b/icclim/tests/test_index_group.py @@ -2,25 +2,26 @@ import pytest -from icclim.models.index_group import IndexGroup +from icclim.icclim_exceptions import InvalidIcclimArgumentError +from icclim.models.index_group import IndexGroupRegistry @pytest.mark.parametrize( "gr", [ - ("temperature", IndexGroup.TEMPERATURE), - ("heat", IndexGroup.HEAT), - ("cold", IndexGroup.COLD), - ("drought", IndexGroup.DROUGHT), - ("rain", IndexGroup.RAIN), - ("snow", IndexGroup.SNOW), - ("compound", IndexGroup.COMPOUND), + ("temperature", IndexGroupRegistry.TEMPERATURE), + ("heat", IndexGroupRegistry.HEAT), + ("cold", IndexGroupRegistry.COLD), + ("drought", IndexGroupRegistry.DROUGHT), + ("rain", IndexGroupRegistry.RAIN), + ("snow", IndexGroupRegistry.SNOW), + ("compound", IndexGroupRegistry.COMPOUND), ], ) def test_lookup_success(gr): - assert IndexGroup.lookup(gr[0]) is gr[1] + assert IndexGroupRegistry.lookup(gr[0]) == gr[1] def test_lookup_error(): - with pytest.raises(NotImplementedError): - IndexGroup.lookup("coin coin le canard") + with pytest.raises(InvalidIcclimArgumentError): + IndexGroupRegistry.lookup("coin coin le canard") diff --git a/icclim/tests/test_input_parsing.py b/icclim/tests/test_input_parsing.py index c5067179..8248db19 100644 --- a/icclim/tests/test_input_parsing.py +++ b/icclim/tests/test_input_parsing.py @@ -2,7 +2,6 @@ import os import shutil -from unittest.mock import MagicMock, patch import numpy as np import pandas as pd @@ -10,10 +9,10 @@ import xarray as xr from xclim.core.utils import PercentileDataArray -from icclim.ecad.ecad_indices import EcadIndex +from icclim.ecad.ecad_indices import EcadIndexRegistry from icclim.icclim_exceptions import InvalidIcclimArgumentError +from icclim.models.constants import UNITS_ATTRIBUTE_KEY from icclim.pre_processing.input_parsing import ( - InFileDictionary, guess_var_names, read_dataset, update_to_standard_coords, @@ -33,17 +32,14 @@ def test_update_to_standard_coords(): ), dims=["t", "latitude", "longitude"], name="pr", - attrs={"units": "kg m-2 d-1"}, + attrs={UNITS_ATTRIBUTE_KEY: "kg m-2 d-1"}, ) } ) # WHEN - res, revert = update_to_standard_coords(ds) + res = update_to_standard_coords(ds) # THEN - assert "lat" in res.coords assert "time" in res.coords - assert "lon" in res.coords - assert res.rename(revert).coords.keys() == ds.coords.keys() class Test_ReadDataset: @@ -62,11 +58,11 @@ def cleanup(self): coords=dict( latitude=[42], longitude=[42], - t=pd.date_range("2042-01-01", periods=10, freq="D"), + time=pd.date_range("2042-01-01", periods=10, freq="D"), ), - dims=["t", "latitude", "longitude"], + dims=["time", "latitude", "longitude"], name="pr", - attrs={"units": "kg m-2 d-1"}, + attrs={UNITS_ATTRIBUTE_KEY: "kg m-2 d-1"}, ) self.tas_da = xr.DataArray( data=np.full(10, 42).reshape((10, 1, 1)), @@ -77,7 +73,7 @@ def cleanup(self): ), dims=["t", "latitude", "longitude"], name="tas", - attrs={"units": "degC"}, + attrs={UNITS_ATTRIBUTE_KEY: "degC"}, ) yield # -- teardown @@ -98,18 +94,6 @@ def test_read_dataset_xr_DataArray__simple(self): # THEN assert "pr" in res.data_vars - def test_read_dataset_xr_DataArray__error_1_var_when_2_needed(self): - # THEN - with pytest.raises(InvalidIcclimArgumentError): - # WHEN - read_dataset(self.pr_da, EcadIndex.WW) - - def test_read_dataset_xr_DataArray__rename_var(self): - # WHEN - ds_res = read_dataset(self.pr_da, EcadIndex.TX90P) - # THEN - xr.testing.assert_equal(ds_res.tasmax, self.pr_da) - def test_read_dataset_xr_da_user_index_success(self): # WHEN ds_res = read_dataset(self.pr_da, None) @@ -164,15 +148,10 @@ def test_read_dataset(self): ds = xr.Dataset({"tas": self.tas_da}) ds.to_netcdf(self.OUTPUT_NC_FILE) # WHEN - res_ds = read_dataset( - in_data={"ninja": self.OUTPUT_NC_FILE, "precipitoto": self.pr_da} - ) + res_ds = read_dataset(self.OUTPUT_NC_FILE) # THEN # asserts variable names are the ones in the actual DataArray/Datasets - assert "ninja" not in res_ds.data_vars - assert "precipitoto" in res_ds.data_vars assert "tas" in res_ds.data_vars - assert "pr" not in res_ds.data_vars def test_read_dataset__with_percentiles(self): # GIVEN @@ -184,82 +163,34 @@ def test_read_dataset__with_percentiles(self): per = PercentileDataArray.from_da( per, climatology_bounds=["1994-12-02", "1999-01-01"] ) + ds["tontontonthetatilotetatoux"] = per # WHEN - res_ds = read_dataset( - in_data={ - "tatas": { - "study": ds, - "thresholds": per, - "climatology_bounds": ("1994-12-02", "1999-01-01"), - "per_var_name": "tontontonthetatilotetatoux", - } - } - ) + res_ds = read_dataset(ds) # THEN assert "tas" in res_ds.data_vars - # A bit weird that - assert "tatas_thresholds" in res_ds.data_vars - - def test_read_dataset__error_no_percentiles_dimension(self): - # GIVEN - ds = xr.Dataset({"tas": self.tas_da}) - ds.to_netcdf(self.OUTPUT_NC_FILE) - # WHEN - tas: InFileDictionary = { - "study": ds, - "thresholds": self.tas_da, - } - # THEN - with pytest.raises(InvalidIcclimArgumentError): - # WHEN - read_dataset(in_data={"tatas": tas}) - - def test_guess_variables__error_no_index(self): - # GIVEN - ds = xr.Dataset({"tas": self.tas_da}) - # THEN - with pytest.raises(InvalidIcclimArgumentError): - # WHEN - guess_var_names(ds) + assert "tontontonthetatilotetatoux" in res_ds.data_vars - def test_guess_variables__error_too_many_args(self): + def test_guess_variables__cant_guess_var_name(self): # GIVEN - ds = xr.Dataset({"tas": self.tas_da}) + ds = xr.Dataset({"canard": self.tas_da, "bergeronnette": self.tas_da}) # THEN with pytest.raises(InvalidIcclimArgumentError): # WHEN - guess_var_names(ds, in_data={}, var_names=["coin-coin"]) - - def test_guess_variables__error_wrong_name_for_index(self): - # GIVEN - ds = xr.Dataset({"tas": self.tas_da}) - # THEN - with pytest.raises(InvalidIcclimArgumentError): - # WHEN - guess_var_names(ds, index=EcadIndex.DTR.climate_index) + guess_var_names(ds, standard_index=EcadIndexRegistry.SU, var_names=None) def test_guess_variables__simple(self): # GIVEN ds = xr.Dataset({"tas": self.tas_da}) # WHEN - res = guess_var_names(ds, index=EcadIndex.TG.climate_index) + res = guess_var_names(ds, standard_index=EcadIndexRegistry.TG, var_names=None) # THEN assert res == ["tas"] - @patch("icclim.pre_processing.input_parsing.InFileType") - def test_guess_variables__from_dict(self, in_file_mock: MagicMock): - # GIVEN - ds = xr.Dataset({"tas": self.tas_da}) - # WHEN - res = guess_var_names(ds, in_data={"pouet": in_file_mock}) - # THEN - assert res == ["pouet"] - def test_guess_variables__from_string(self): # GIVEN ds = xr.Dataset({"tas": self.tas_da}) # WHEN - res = guess_var_names(ds, var_names="cocoLasticot") + res = guess_var_names(ds, standard_index=None, var_names="cocoLasticot") # THEN assert res == ["cocoLasticot"] @@ -267,14 +198,14 @@ def test_guess_variables__from_list(self): # GIVEN ds = xr.Dataset({"tas": self.tas_da}) # WHEN - res = guess_var_names(ds, var_names=["pinçon"]) + res = guess_var_names(ds, standard_index=None, var_names=["pinçon"]) # THEN assert res == ["pinçon"] def test_guess_variables__from_alias(self): # GIVEN - ds = xr.Dataset({"tasmaxAdjust": self.tas_da}) + ds = xr.Dataset({"tasmaxAdjust": self.tas_da, "turlututut": self.tas_da}) # WHEN - res = guess_var_names(ds, index=EcadIndex.SU.climate_index) + res = guess_var_names(ds, standard_index=EcadIndexRegistry.SU, var_names=None) # THEN assert res == ["tasmaxAdjust"] diff --git a/icclim/tests/test_main.py b/icclim/tests/test_main.py index 17d8f742..9d042a36 100644 --- a/icclim/tests/test_main.py +++ b/icclim/tests/test_main.py @@ -11,11 +11,17 @@ import xarray as xr import icclim -from icclim.ecad.ecad_indices import EcadIndex, get_season_excluded_indices -from icclim.icclim_exceptions import InvalidIcclimArgumentError -from icclim.models.constants import ICCLIM_VERSION -from icclim.models.frequency import Frequency -from icclim.models.index_group import IndexGroup +from icclim.ecad.ecad_indices import EcadIndexRegistry +from icclim.models.constants import ( + ICCLIM_VERSION, + PART_OF_A_WHOLE_UNIT, + REFERENCE_PERIOD_ID, + UNITS_ATTRIBUTE_KEY, +) +from icclim.models.frequency import FrequencyRegistry +from icclim.models.index_group import IndexGroupRegistry +from icclim.models.threshold import Threshold +from icclim.tests.testing_utils import K2C, stub_pr, stub_tas @patch("icclim.main.index") @@ -52,14 +58,14 @@ class Test_Integration: data=(np.full(len(TIME_RANGE), 20).reshape((len(TIME_RANGE), 1, 1))), dims=["time", "lat", "lon"], coords=dict(lat=[42], lon=[42], time=TIME_RANGE), - attrs={"units": "degC"}, + attrs={UNITS_ATTRIBUTE_KEY: "degC"}, ) data_cf_time = xr.DataArray( data=(np.full(len(TIME_RANGE), 20).reshape((len(TIME_RANGE), 1, 1))), dims=["time", "lat", "lon"], coords=dict(lat=[42], lon=[42], time=CF_TIME_RANGE), - attrs={"units": "degC"}, + attrs={UNITS_ATTRIBUTE_KEY: "degC"}, ) # usually, time_bounds is not properly decoded an keep a object dtype @@ -85,17 +91,17 @@ def cleanup(self): pass def test_index_SU(self): + tas = stub_tas(tas_value=26 + K2C) + tas[:5] = 0 res = icclim.index( - indice_name="SU", - in_files=self.data, - out_file=self.OUTPUT_FILE, + index_name="SU", in_files=tas, out_file=self.OUTPUT_FILE, slice_mode="ms" ) assert f"icclim version: {ICCLIM_VERSION}" in res.attrs["history"] - np.testing.assert_array_equal(0, res.SU) + assert res.SU.isel(time=0) == 26 # January def test_index_SU__on_dataset(self): res = icclim.index( - indice_name="SU", + index_name="SU", var_name="data", in_files=self.dataset_with_time_bounds, out_file=self.OUTPUT_FILE, @@ -103,31 +109,29 @@ def test_index_SU__on_dataset(self): assert f"icclim version: {ICCLIM_VERSION}" in res.attrs["history"] np.testing.assert_array_equal(0, res.SU) - def test_index_SU__custom_threshold(self): - res = icclim.su(in_files=self.data, out_file=self.OUTPUT_FILE, threshold=42) - assert f"icclim version: {ICCLIM_VERSION}" in res.attrs["history"] - assert res.coords["thresholds"] == 42 - np.testing.assert_array_equal(0, res.SU) - - def test_index_SU__multiple_thresholds(self): - res = icclim.su( - in_files=self.data, out_file=self.OUTPUT_FILE, threshold=[42, 53] + def test_index_DTR(self): + ds = self.data.to_dataset(name="toto") + ds["tutu"] = self.data + res = icclim.index( + index_name="DTR", + in_files=ds, + out_file=self.OUTPUT_FILE, + var_name=["toto", "tutu"], ) - assert res.attrs["title"] == "Index SU on threshold(s) [42, 53]" - np.testing.assert_array_equal(res.coords["thresholds"], [42, 53]) - np.testing.assert_array_equal(0, res.SU) + assert f"icclim version: {ICCLIM_VERSION}" in res.attrs["history"] + np.testing.assert_array_equal(0, res.DTR) - def test_index_TX90p__multiple_thresholds(self): - res = icclim.tx90p( - in_files=self.data, + def test_index_CD(self): + ds = self.data.to_dataset(name="tas") + ds["pr"] = self.data.copy(deep=True) + ds["pr"].attrs[UNITS_ATTRIBUTE_KEY] = "kg m-2 d-1" + res = icclim.index( + index_name="CD", + in_files=ds, out_file=self.OUTPUT_FILE, - threshold=[42, 53], - save_percentile=True, ) - assert res.attrs["title"] == "Index TX90p on threshold(s) [42, 53]" - np.testing.assert_array_equal(res.coords["percentiles"], [42, 53]) - assert res.percentiles is not None - np.testing.assert_array_equal(0, res.TX90p) + assert f"icclim version: {ICCLIM_VERSION}" in res.attrs["history"] + np.testing.assert_array_equal(0, res.CD) def test__preserve_initial_history(self): self.data.attrs["history"] = "pouet pouet cacahuête" @@ -137,13 +141,13 @@ def test__preserve_initial_history(self): def test_index_SU__time_selection(self): # WHEN res_string_dates = icclim.index( - indice_name="SU", + index_name="SU", in_files=self.data, out_file=self.OUTPUT_FILE, time_range=("19 july 2042", "14 august 2044"), ) res_datetime_dates = icclim.index( - indice_name="SU", + index_name="SU", in_files=self.data, out_file=self.OUTPUT_FILE, time_range=[datetime(2042, 7, 19), datetime(2044, 8, 14)], @@ -161,7 +165,7 @@ def test_index_SU__time_selection(self): def test_index_SU__pandas_time_slice_mode(self): # WHEN res = icclim.index( - indice_name="SU", + index_name="SU", in_files=self.data, out_file=self.OUTPUT_FILE, slice_mode="2W-WED", @@ -169,13 +173,17 @@ def test_index_SU__pandas_time_slice_mode(self): # THEN assert res.time_bounds[0, 0] == np.datetime64(datetime(2042, 1, 1)) assert res.time_bounds[0, 1] == np.datetime64(datetime(2042, 1, 14)) + assert ( + res.SU.attrs["standard_name"] + == "number_of_days_with_maximum_air_temperature_above_threshold" + ) def test_index_SU__monthy_sampled(self): res = icclim.index( - indice_name="SU", + index_name="SU", in_files=self.data, out_file=self.OUTPUT_FILE, - slice_mode=Frequency.MONTH, + slice_mode=FrequencyRegistry.MONTH, ) np.testing.assert_array_equal(0, res.SU) np.testing.assert_array_equal( @@ -184,10 +192,10 @@ def test_index_SU__monthy_sampled(self): def test_index_SU__monthy_sampled_cf_time(self): res = icclim.index( - indice_name="SU", + index_name="SU", in_files=self.data_cf_time, out_file=self.OUTPUT_FILE, - slice_mode=Frequency.MONTH, + slice_mode=FrequencyRegistry.MONTH, ) np.testing.assert_array_equal(0, res.SU) np.testing.assert_array_equal( @@ -202,10 +210,10 @@ def test_index_SU__monthy_sampled_cf_time(self): def test_index_SU__DJF_cf_time(self): res = icclim.index( - indice_name="SU", + index_name="SU", in_files=self.data_cf_time, out_file=self.OUTPUT_FILE, - slice_mode=Frequency.DJF, + slice_mode=FrequencyRegistry.DJF, ) np.testing.assert_array_equal(res.SU.isel(time=0), np.NAN) np.testing.assert_array_equal(res.SU.isel(time=1), 0) @@ -220,9 +228,11 @@ def test_index_SU__DJF_cf_time(self): 2042, 2, 28, 0, 0, 0, 0 ) - def test_indices_from_DataArray(self): + def test_indices__from_DataArray(self): res = icclim.indices( - index_group=IndexGroup.HEAT, in_files=self.data, out_file=self.OUTPUT_FILE + index_group=IndexGroupRegistry.HEAT, + in_files=self.data, + out_file=self.OUTPUT_FILE, ) for i in HEAT_INDICES: assert res[i] is not None @@ -230,11 +240,13 @@ def test_indices_from_DataArray(self): def test_indices__snow_indices(self): ds = self.data.to_dataset(name="tas") ds["prec"] = self.data.copy(deep=True) - ds["prec"].attrs["units"] = "cm" + ds["prec"].attrs[UNITS_ATTRIBUTE_KEY] = "cm" res = icclim.indices( - index_group=IndexGroup.SNOW, in_files=ds, out_file=self.OUTPUT_FILE + index_group=IndexGroupRegistry.SNOW, in_files=ds, out_file=self.OUTPUT_FILE ) - for i in filter(lambda i: i.group == IndexGroup.SNOW, EcadIndex): + for i in filter( + lambda i: i.group == IndexGroupRegistry.SNOW, EcadIndexRegistry.values() + ): assert res[i.short_name] is not None def test_indices_all_from_Dataset(self): @@ -242,98 +254,79 @@ def test_indices_all_from_Dataset(self): ds["tasmax"] = self.data ds["tasmin"] = self.data ds["pr"] = self.data.copy(deep=True) - ds["pr"].attrs["units"] = "kg m-2 d-1" + ds["pr"].attrs[UNITS_ATTRIBUTE_KEY] = "kg m-2 d-1" ds["prec"] = self.data.copy(deep=True) - ds["prec"].attrs["units"] = "cm" + ds["prec"].attrs[UNITS_ATTRIBUTE_KEY] = "cm" res = icclim.indices(index_group="all", in_files=ds, out_file=self.OUTPUT_FILE) - for i in EcadIndex: + for i in EcadIndexRegistry.values(): assert res[i.short_name] is not None - def test_indices_all_from_Dataset__seasonal_clip(self): + def test_indices_all_from_Dataset__seasonal(self): ds = self.data.to_dataset(name="tas") ds["tasmax"] = self.data ds["tasmin"] = self.data ds["pr"] = self.data.copy(deep=True) - ds["pr"].attrs["units"] = "kg m-2 d-1" + ds["pr"].attrs[UNITS_ATTRIBUTE_KEY] = "kg m-2 d-1" ds["prec"] = self.data.copy(deep=True) - ds["prec"].attrs["units"] = "cm" + ds["prec"].attrs[UNITS_ATTRIBUTE_KEY] = "cm" res = icclim.indices( index_group="all", in_files=ds, out_file=self.OUTPUT_FILE, - slice_mode=["clipped_season", [1, 2, 3]], + slice_mode=["season", [1, 2, 3]], ) - for i in EcadIndex: + for i in EcadIndexRegistry.values(): assert res[i.short_name] is not None - def test_indices_all_from_Dataset__between_dates_seasonal_clip(self): + def test_indices_all_from_Dataset__between_dates_seasonal(self): ds = self.data.to_dataset(name="tas") ds["tasmax"] = self.data ds["tasmin"] = self.data ds["pr"] = self.data.copy(deep=True) - ds["pr"].attrs["units"] = "kg m-2 d-1" + ds["pr"].attrs[UNITS_ATTRIBUTE_KEY] = "kg m-2 d-1" ds["prec"] = self.data.copy(deep=True) - ds["prec"].attrs["units"] = "cm" + ds["prec"].attrs[UNITS_ATTRIBUTE_KEY] = "cm" res = icclim.indices( index_group="all", in_files=ds, out_file=self.OUTPUT_FILE, - slice_mode=["clipped_season", ["07-19", "08-14"]], + slice_mode=["season", ["07-19", "08-14"]], ) - for i in EcadIndex: + for i in EcadIndexRegistry.values(): assert res[i.short_name] is not None - def test_indices_all_from_Dataset__JFM_seasonal_clip(self): + def test_indices_all_from_Dataset__JFM_seasonal(self): ds = self.data.to_dataset(name="tas") ds["tasmax"] = self.data ds["tasmin"] = self.data ds["pr"] = self.data.copy(deep=True) - ds["pr"].attrs["units"] = "kg m-2 d-1" + ds["pr"].attrs[UNITS_ATTRIBUTE_KEY] = "kg m-2 d-1" ds["prec"] = self.data.copy(deep=True) - ds["prec"].attrs["units"] = "cm" + ds["prec"].attrs[UNITS_ATTRIBUTE_KEY] = "cm" res = icclim.indices( index_group="all", in_files=ds, out_file=self.OUTPUT_FILE, - slice_mode=["clipped_season", [1, 2, 3]], + slice_mode=["season", [1, 2, 3]], ) - for i in EcadIndex: + for i in EcadIndexRegistry.values(): assert res[i.short_name] is not None - def test_indices_all_from_Dataset__seasonal_error(self): - # GIVEN + def test_indices_all_from_Dataset__between_year_season(self): ds = self.data.to_dataset(name="tas") ds["tasmax"] = self.data ds["tasmin"] = self.data ds["pr"] = self.data.copy(deep=True) - ds["pr"].attrs["units"] = "kg m-2 d-1" + ds["pr"].attrs[UNITS_ATTRIBUTE_KEY] = "kg m-2 d-1" ds["prec"] = self.data.copy(deep=True) - ds["prec"].attrs["units"] = "cm" - # THEN - with pytest.raises(InvalidIcclimArgumentError): - # WHEN - icclim.indices( - index_group="all", - in_files=ds, - out_file=self.OUTPUT_FILE, - slice_mode=["season", [1, 2, 3]], - ) - - def test_indices_all_from_Dataset__between_year_clipped_season(self): - ds = self.data.to_dataset(name="tas") - ds["tasmax"] = self.data - ds["tasmin"] = self.data - ds["pr"] = self.data.copy(deep=True) - ds["pr"].attrs["units"] = "kg m-2 d-1" - ds["prec"] = self.data.copy(deep=True) - ds["prec"].attrs["units"] = "cm" + ds["prec"].attrs[UNITS_ATTRIBUTE_KEY] = "cm" res = icclim.indices( index_group="all", in_files=ds, out_file=self.OUTPUT_FILE, - slice_mode=["clipped_season", [12, 1, 2, 3]], + slice_mode=["season", [12, 1, 2, 3]], ) - for i in EcadIndex: + for i in EcadIndexRegistry.values(): assert res[i.short_name] is not None def test_indices_all_ignore_error(self): @@ -341,7 +334,7 @@ def test_indices_all_ignore_error(self): ds["tasmax"] = self.data ds["tasmin"] = self.data ds["pr"] = self.data.copy(deep=True) - ds["pr"].attrs["units"] = "kg m-2 d-1" + ds["pr"].attrs[UNITS_ATTRIBUTE_KEY] = "kg m-2 d-1" res: xr.Dataset = icclim.indices( index_group="all", in_files=ds, @@ -349,9 +342,9 @@ def test_indices_all_ignore_error(self): ignore_error=True, slice_mode="DJF", ).compute() - for i in EcadIndex: + for i in EcadIndexRegistry.values(): # No variable in input to compute snow indices - if i.group == IndexGroup.SNOW or i in get_season_excluded_indices(): + if i.group == IndexGroupRegistry.SNOW: assert res.data_vars.get(i.short_name, None) is None else: assert res[i.short_name] is not None @@ -361,7 +354,7 @@ def test_indices_all__error(self): ds["tasmax"] = self.data ds["tasmin"] = self.data ds["pr"] = self.data.copy(deep=True) - ds["pr"].attrs["units"] = "kg m-2 d-1" + ds["pr"].attrs[UNITS_ATTRIBUTE_KEY] = "kg m-2 d-1" with pytest.raises(Exception): icclim.indices( index_group="all", @@ -369,3 +362,260 @@ def test_indices_all__error(self): out_file=self.OUTPUT_FILE, ignore_error=False, ) + + def test_index_TR(self): + tas = stub_tas(tas_value=26 + K2C) + tas[:5] = 0 + res = icclim.index( + index_name="TR", in_files=tas, out_file=self.OUTPUT_FILE, slice_mode="ms" + ) + assert f"icclim version: {ICCLIM_VERSION}" in res.attrs["history"] + assert res.TR.isel(time=0) == 26 # January + + def test_index_prcptot(self): + pr = stub_pr(value=2) + pr[:10] = 0 + res = icclim.index( + index_name="prcptot", + in_files=pr, + out_file=self.OUTPUT_FILE, + slice_mode="ms", + ) + assert res.isel(time=0) == 42.0 + + def test_index_csu(self): + tas = stub_tas(tas_value=26 + K2C) + tas[10:15] = 0 + res = icclim.index( + index_name="csu", in_files=tas, out_file=self.OUTPUT_FILE, slice_mode="ms" + ) + assert res.isel(time=0) == 16 + + def test_index_gd4(self): + tas = stub_tas(tas_value=26 + K2C) + tas[5:15] = 0 + res = icclim.index( + index_name="gd4", in_files=tas, out_file=self.OUTPUT_FILE, slice_mode="ms" + ) + expected = (26 - 4) * 21 + assert ( + res.isel(time=0) == expected + ) # 21 days in January above 4 degC (at 26degC) + + def test_index_cfd(self): + tas = stub_tas(tas_value=26 + K2C) + tas[5:15] = 0 + res = icclim.index( + index_name="cfd", in_files=tas, out_file=self.OUTPUT_FILE, slice_mode="ms" + ) + assert res.isel(time=0) == 1 + + def test_index_fd(self): + tas = stub_tas(tas_value=26 + K2C) + tas[5:15] = 0 + tas[20:25] = 0 + res = icclim.index( + index_name="fd", in_files=tas, out_file=self.OUTPUT_FILE, slice_mode="ms" + ) + assert res.isel(time=0) == 15 + + def test_index_hd17(self): + tas = stub_tas(tas_value=27 + K2C) + tas[5:10] = 0 + res = icclim.index( + index_name="hd17", in_files=tas, out_file=self.OUTPUT_FILE, slice_mode="ms" + ) + assert res.isel(time=0) == 5 * (17 + K2C) + + def test_index_tx90p__no_bootstrap_because_one_single_year_of_ref(self): + tas = stub_tas(tas_value=27 + K2C) + tas[5:15] = 0 + res = icclim.index( + index_name="tx90p", + in_files=tas, + doy_window_width=5, + base_period_time_range=("2042-01-01", "2042-12-31"), + time_range=("2042-01-01", "2045-12-31"), + out_file=self.OUTPUT_FILE, + slice_mode="ms", + ) + assert REFERENCE_PERIOD_ID not in res.TX90p.attrs + # The 90th percentile here is clipped to the maximum of tas window (27 degC) + # due to the "median_unbiased" interpolation. + # Thus no value are strictly above it. + assert res.TX90p.isel(time=0) == 0 + + def test_index_tx90p__no_bootstrap_because_no_overlap(self): + tas = stub_tas(tas_value=27 + K2C) + tas[5:10] = 0 + res = icclim.index( + index_name="tx90p", + in_files=tas, + doy_window_width=1, + time_range=("2043-01-01", "2045-12-31"), + base_period_time_range=("2042-01-01", "2042-12-31"), + out_file=self.OUTPUT_FILE, + slice_mode="ms", + ) + assert REFERENCE_PERIOD_ID not in res.TX90p.attrs + # resample_doy add a day where 90th per is below tas + assert res.TX90p.isel(time=0) == 6 + + def test_index_tx90p__bootstrap_2_years(self): + tas = stub_tas(tas_value=27 + K2C) + tas[5:10] = 0 + res = icclim.index( + index_name="tx90p", + in_files=tas, + doy_window_width=1, + time_range=("2042-01-01", "2045-12-31"), + base_period_time_range=("2042-01-01", "2043-12-31"), + out_file=self.OUTPUT_FILE, + slice_mode="ms", + ) + assert REFERENCE_PERIOD_ID in res.TX90p.attrs + # 2042 values are compared to 2043's 90th percentile due to bootstrap + assert res.TX90p.sel(time="2042-01") == 0 + # 2043 values are compared to 2042's 90th percentile due to bootstrap + assert res.TX90p.sel(time="2043-01") == 5 + + def test_index_wsdi__no_bootstrap_because_no_overlap(self): + tas = stub_tas(tas_value=27 + K2C) + tas[0:10] = 0 + res = icclim.index( + index_name="wsdi", + in_files=tas, + doy_window_width=1, + time_range=("2043-01-01", "2045-12-31"), + base_period_time_range=("2042-01-01", "2042-12-31"), + out_file=self.OUTPUT_FILE, + slice_mode="ms", + ) + assert REFERENCE_PERIOD_ID not in res.WSDI.attrs + # 1 more day than in tas because of resample_doy that interpolate values + assert res.WSDI.isel(time=0) == 11 + + def test_index_csdi__no_bootstrap_because_no_overlap(self): + tas = stub_tas(tas_value=2 + K2C) + tas[0:10] = 35 + K2C + res = icclim.index( + index_name="csdi", + in_files=tas, + doy_window_width=1, + time_range=("2043-01-01", "2045-12-31"), + base_period_time_range=("2042-01-01", "2042-12-31"), + out_file=self.OUTPUT_FILE, + slice_mode="ms", + ) + assert REFERENCE_PERIOD_ID not in res.CSDI.attrs + print(res.CSDI.isel(time=0).compute()) + # 1 more day than in tas because of resample_doy that interpolate values + assert res.CSDI.isel(time=0) == 11 + + def test_count_occurrences__date_event(self): + tas = stub_tas(tas_value=2 + K2C) + tas[10] = 35 + K2C + res = icclim.index( + tas, + var_name=["tmin"], + index_name="count_occurrences", + threshold=">= 22 degree_Celsius", + slice_mode="month", + date_event=True, + ).compute() + assert "event_date_start" in res.coords + assert "event_date_end" in res.coords + assert res.count_occurrences.isel(time=0).event_date_end == np.datetime64( + "2042-01-11" + ) + assert res.count_occurrences.isel(time=0).event_date_start == np.datetime64( + "2042-01-11" + ) + + def test_count_occurrences__to_percent(self): + tas = stub_tas(tas_value=2 + K2C) + tas[10] = 35 + K2C + res = icclim.index( + tas, + var_name=["tmin"], + index_name="count_occurrences", + threshold=">= 22 degree_Celsius", + slice_mode="month", + out_unit="%", + ).compute() + assert res.count_occurrences.attrs[UNITS_ATTRIBUTE_KEY] == "%" + assert res.count_occurrences.isel(time=0) == 1 / 31 * 100 + + def test_excess__on_doy_percentile(self): + tas = stub_tas(tas_value=10 + K2C).rename("tas") + tas[10] = 5 + K2C + res = icclim.index( + tas, + index_name="excess", + time_range=["2044-01-01", "2045-12-31"], + threshold=Threshold( + "10 doy_per", + doy_window_width=1, + reference_period=["2042-01-01", "2042-12-31"], + ), + slice_mode="month", + save_thresholds=True, + ).compute() + # not exactly 5 because of resample_doy interpolation + np.testing.assert_almost_equal(res.excess.isel(time=0), 5.01369863) + print(res) + assert "tas_thresholds" in res.data_vars + + def test_deficit__on_doy_percentile(self): + tas = stub_tas(tas_value=5 + K2C).rename("tas") + tas[10] = 10 + K2C + res = icclim.index( + tas, + index_name="deficit", + time_range=["2044-01-01", "2045-12-31"], + threshold=Threshold( + "10 doy_per", + doy_window_width=1, + reference_period=["2042-01-01", "2042-12-31"], + ), + slice_mode="month", + save_thresholds=True, + ).compute() + # not exactly 5 because of resample_doy interpolation + np.testing.assert_almost_equal(res.deficit.isel(time=0), 5.01369863) + assert "tas_thresholds" in res.data_vars + + def test_fraction_of_total(self): + tas = stub_tas(tas_value=25 + K2C).rename("tas") + tas[tas.time.dt.date == np.datetime64("2042-06-10")] = 10 + K2C + res = icclim.index( + tas, + index_name="fraction_of_total", + threshold="> 20 degree_Celsius", + slice_mode="jja", + ).compute() + np.testing.assert_almost_equal(res.fraction_of_total.isel(time=0), 0.98967164) + assert res.fraction_of_total.isel(time=1) == 1 + assert res.fraction_of_total.attrs[UNITS_ATTRIBUTE_KEY] == PART_OF_A_WHOLE_UNIT + + def test_fraction_of_total_percent(self): + tas = stub_tas(tas_value=25 + K2C).rename("tas") + tas[tas.time.dt.date == np.datetime64("2042-06-10")] = 10 + K2C + res = icclim.index( + tas, + index_name="fraction_of_total", + threshold="> 20 degree_Celsius", + out_unit="%", + slice_mode="jja", + ).compute() + np.testing.assert_almost_equal(res.fraction_of_total.isel(time=0), 98.96716372) + assert res.fraction_of_total.isel(time=1) == 100 + assert res.fraction_of_total.attrs[UNITS_ATTRIBUTE_KEY] == "%" + + def test_std(self): + tas = stub_tas(tas_value=25 + K2C).rename("tas") + res = icclim.index( + tas, + index_name="standard_deviation", + ).compute() + np.testing.assert_almost_equal(res.standard_deviation.isel(time=0), 0) diff --git a/icclim/tests/test_operators.py b/icclim/tests/test_operators.py deleted file mode 100644 index 3f1dea31..00000000 --- a/icclim/tests/test_operators.py +++ /dev/null @@ -1,282 +0,0 @@ -from __future__ import annotations - -import numpy as np -import pytest -import xarray as xr - -from icclim.models.user_index_config import ( - ExtremeMode, - LinkLogicalOperation, - LogicalOperation, -) -from icclim.tests.testing_utils import stub_tas -from icclim.user_indices.operators import ( - _apply_coef, - anomaly, - count_events, - max, - max_consecutive_event_count, - mean, - min, - run_mean, - run_sum, - sum, -) - - -class Test_apply_coef: - @pytest.mark.parametrize("use_dask", [True, False]) - def test_simple(self, use_dask): - # GIVEN - da = stub_tas(use_dask=use_dask) - # WHEN - result = _apply_coef(4.0, da) - # THEN - np.testing.assert_equal(result.data, 4.0) - - -class Test_max: - @pytest.mark.parametrize("use_dask", [True, False]) - def test_simple(self, use_dask): - da = stub_tas(use_dask=use_dask) - da.data[1] = 20 - # WHEN - result = max( - da=da, - coef=1, - logical_operation=None, - threshold=None, - freq="YS", - date_event=True, - ) - # THEN - assert result.data[0] == 20 - - -class Test_min: - @pytest.mark.parametrize("use_dask", [True, False]) - def test_simple(self, use_dask): - da = stub_tas(use_dask=use_dask) - da.data[1] = -20 - # WHEN - result = min(da=da, freq="YS") - # THEN - assert result.data[0] == -20 - - -class Test_mean: - @pytest.mark.parametrize("use_dask", [True, False]) - def test_simple(self, use_dask): - da = stub_tas(use_dask=use_dask) - da[2] = 366 - # WHEN - result = mean( - da=da, - freq="YS", - ) - # THEN - assert result.data[0] == 2 - - -class Test_sum: - @pytest.mark.parametrize("use_dask", [True, False]) - def test_simple(self, use_dask): - da = stub_tas(use_dask=use_dask) - # WHEN - result = sum(da=da, freq="YS") - # THEN - assert result.data[0] == 365 - - -class Test_count_events: - @pytest.mark.parametrize("use_dask", [True, False]) - def test_simple(self, use_dask): - # GIVEN - da = stub_tas(10, use_dask) - da[1] = 15 - da[2] = 16 - # WHEN - result = count_events( - das=[da], - in_base_das=[None], - logical_operation=[LogicalOperation.GREATER_THAN], - thresholds=[15], - freq="MS", - ) - # THEN - assert result[0] == 1 - - @pytest.mark.parametrize("use_dask", [True, False]) - def test_simple_percentile(self, use_dask): - # GIVEN - da = stub_tas(10, use_dask) - da[1] = 15 - da[2] = 16 - # WHEN - result = count_events( - das=[da], - in_base_das=[da], - logical_operation=[LogicalOperation.GREATER_THAN], - thresholds=["80p"], - freq="MS", - ) - # THEN - xr.testing.assert_duckarray_equal(result.isel(time=0), 2) - - @pytest.mark.parametrize("use_dask", [True, False]) - def test_multi_threshold_or(self, use_dask): - # GIVEN - tmax = stub_tas(10, use_dask) - tmax[1] = 15 - tmin = stub_tas(-10, use_dask) - # WHEN - result = count_events( - das=[tmax, tmin], - in_base_das=[None], - logical_operation=[LogicalOperation.GREATER_THAN, LogicalOperation.EQUAL], - thresholds=[12, -20], - link_logical_operations=LinkLogicalOperation.OR_STAMP, - freq="MS", - ) - # THEN - assert result[0] == 1 - - @pytest.mark.parametrize("use_dask", [True, False]) - def test_multi_threshold_and(self, use_dask): - # GIVEN - tmax = stub_tas(10, use_dask) - tmax[1] = 15 - tmin = stub_tas(-10, use_dask) - tmin[1] = -20 - # WHEN - result = count_events( - das=[tmax, tmin], - in_base_das=[None], - logical_operation=[LogicalOperation.GREATER_THAN, LogicalOperation.EQUAL], - thresholds=[12, -20], - link_logical_operations=LinkLogicalOperation.AND_STAMP, - freq="MS", - ) - # THEN - assert result[0] == 1 - - -class Test_run_mean: - @pytest.mark.parametrize("use_dask", [True, False]) - def test_simple_min(self, use_dask): - # GIVEN - tmax = stub_tas(10, use_dask) - tmax[30] = 0 - tmax[29] = 0 - tmax[28] = 0 - tmax[27] = 0 - tmax[26] = 0 - # WHEN - result = run_mean( - da=tmax, - extreme_mode=ExtremeMode.MIN, - window_width=5, - freq="MS", - ) - # THEN - assert result[0] == 0 - assert result[1] == 2 - assert result[2] == 10 - - @pytest.mark.parametrize("use_dask", [True, False]) - def test_simple_max(self, use_dask): - # GIVEN - tmax = stub_tas(10, use_dask) - tmax[30] = 20 - # WHEN - result = run_mean( - da=tmax, - extreme_mode=ExtremeMode.MAX, - window_width=2, - freq="MS", - ) - # THEN - assert result[0] == 15 - assert result[1] == 15 - assert result[2] == 10 - - -class Test_run_sum: - @pytest.mark.parametrize("use_dask", [True, False]) - def test_simple_min(self, use_dask): - # GIVEN - tmax = stub_tas(10, use_dask) - tmax[30] = 0 - tmax[29] = 0 - tmax[28] = 0 - tmax[27] = 0 - tmax[26] = 0 - # WHEN - result = run_sum( - da=tmax, - extreme_mode=ExtremeMode.MIN, - window_width=5, - freq="MS", - ) - # THEN - assert result[0] == 0 - assert result[1] == 10 - assert result[2] == 50 - - @pytest.mark.parametrize("use_dask", [True, False]) - def test_simple_max(self, use_dask): - # GIVEN - tmax = stub_tas(10, use_dask) - tmax[30] = 20 - # WHEN - result = run_sum( - da=tmax, - extreme_mode=ExtremeMode.MAX, - window_width=2, - freq="MS", - ) - # THEN - assert result[0] == 30 - assert result[1] == 30 - assert result[2] == 20 - - -class Test_max_consecutive_event_count: - @pytest.mark.parametrize("use_dask", [True, False]) - def test_simple(self, use_dask): - # GIVEN - tmax = stub_tas(10, use_dask) - tmax[30] = 15 # On 31th january - # WHEN - result = max_consecutive_event_count( - da=tmax, - logical_operation=LogicalOperation.EQUAL, - threshold=10.0, - freq="YS", - ) - # THEN - assert result[0] == 334 - assert result[1] == 365 - - -class Test_anomaly: - @pytest.mark.parametrize("use_dask", [True, False]) - def test_simple(self, use_dask): - # GIVEN - tmax = stub_tas(10, use_dask) - tmax2 = stub_tas(11, use_dask) - # WHEN - result = anomaly(da_ref=tmax, da=tmax2, percent=False) - # THEN - assert result == 1 - - @pytest.mark.parametrize("use_dask", [True, False]) - def test_simple_percent(self, use_dask): - # GIVEN - tmax = stub_tas(10, use_dask) - tmax2 = stub_tas(11, use_dask) - # WHEN - result = anomaly(da_ref=tmax, da=tmax2, percent=True) - # THEN - assert result == 10 - assert result.attrs["units"] == "%" diff --git a/icclim/tests/test_rechunk.py b/icclim/tests/test_rechunk.py index a642f8a4..5f512df5 100644 --- a/icclim/tests/test_rechunk.py +++ b/icclim/tests/test_rechunk.py @@ -9,6 +9,7 @@ from icclim import create_optimized_zarr_store from icclim.icclim_exceptions import InvalidIcclimArgumentError +from icclim.models.constants import UNITS_ATTRIBUTE_KEY def test_create_optimized_zarr_store_success(): @@ -23,7 +24,7 @@ def test_create_optimized_zarr_store_success(): ), dims=["time", "lat", "lon"], name="pr", - attrs={"units": "kg m-2 d-1"}, + attrs={UNITS_ATTRIBUTE_KEY: "kg m-2 d-1"}, ) } ).chunk({"time": 2}) @@ -49,7 +50,7 @@ def test_create_optimized_zarr_store_error(): ), dims=["time", "lat", "lon"], name="pr", - attrs={"units": "kg m-2 d-1"}, + attrs={UNITS_ATTRIBUTE_KEY: "kg m-2 d-1"}, ) } ).chunk({"time": 2}) @@ -77,7 +78,7 @@ def test_create_optimized_zarr_store_no_rechunk(rechunk_mock: MagicMock): ), dims=["time", "lat", "lon"], name="pr", - attrs={"units": "kg m-2 d-1"}, + attrs={UNITS_ATTRIBUTE_KEY: "kg m-2 d-1"}, ) } ).chunk({"time": 2}) diff --git a/icclim/tests/test_user_index.py b/icclim/tests/test_user_index.py new file mode 100644 index 00000000..7480c556 --- /dev/null +++ b/icclim/tests/test_user_index.py @@ -0,0 +1,365 @@ +from __future__ import annotations + +from xclim.core.calendar import build_climatology_bounds + +import icclim +from icclim.models.constants import ( + UNITS_ATTRIBUTE_KEY, + USER_INDEX_PRECIPITATION_STAMP, + USER_INDEX_TEMPERATURE_STAMP, +) +from icclim.models.operator import OperatorRegistry +from icclim.tests.testing_utils import stub_tas + + +class Test_max: + def test_simple(self): + da = stub_tas(use_dask=False) + da.data[1] = 20 + # WHEN + result = icclim.index( + in_files=da, + user_index=dict( + index_name="data", calc_operation="max", coef=1, logical_operation=None + ), + ) + # THEN + assert result.data[0] == 20 + + +class Test_min: + def test_simple(self): + da = stub_tas(use_dask=False) + da.data[1] = -20 + # WHEN + result = icclim.index( + in_files=da, + user_index=dict(index_name="data", calc_operation="min"), + ) + # THEN + assert result.data[0] == -20 + + +class Test_mean: + def test_simple(self): + da = stub_tas(use_dask=False) + da[2] = 366 + # WHEN + result = icclim.index( + in_files=da, + user_index=dict(index_name="data", calc_operation="mean"), + ) + # THEN + assert result.data[0] == 2 + + +class Test_sum: + def test_simple(self): + da = stub_tas(use_dask=False) + # WHEN + result = icclim.index( + in_files=da, + user_index=dict(index_name="data", calc_operation="sum"), + slice_mode="year", + ) + # THEN + assert result.data[0] == 365 + + +class Test_count_events: + def test_simple(self): + # GIVEN + da = stub_tas(10, False) + da[1] = 15 + da[2] = 16 + # WHEN + result = icclim.index( + in_files=da, + user_index=dict( + index_name="data", + calc_operation="nb_events", + thresh=15, + logical_operation=OperatorRegistry.GREATER, + ), + slice_mode="month", + ) + # THEN + assert result.data[0] == 1 + + def test_simple_default_percentile(self): + # GIVEN + da = stub_tas(10, False) + da[1] = 15 + da[2] = 16 + # WHEN + result = icclim.index( + in_files=da, + user_index=dict( + index_name="data", + calc_operation="nb_events", + thresh="50p", + logical_operation=OperatorRegistry.GREATER, + ), + base_period_time_range=build_climatology_bounds(da), + slice_mode="month", + ) + # THEN + assert result.data.isel(time=0) == 2 + + def test_simple_period_percentile(self): + # GIVEN + da = stub_tas(10, False) + da[1] = 15 + da[2] = 16 + # WHEN + result = icclim.index( + in_files=da, + user_index=dict( + index_name="data", + calc_operation="nb_events", + thresh="50p", + var_type=USER_INDEX_PRECIPITATION_STAMP, + logical_operation=OperatorRegistry.GREATER, + ), + base_period_time_range=build_climatology_bounds(da), + slice_mode="month", + ) + # THEN + assert result.data.isel(time=0) == 2 + + def test_simple_doy_percentile(self): + # GIVEN + da = stub_tas(10, False) + da[1] = 15 + da[2] = 16 + # WHEN + result = icclim.index( + in_files=da, + user_index=dict( + index_name="data", + calc_operation="nb_events", + thresh="80p", + var_type=USER_INDEX_TEMPERATURE_STAMP, + logical_operation=OperatorRegistry.GREATER, + ), + base_period_time_range=build_climatology_bounds(da), + slice_mode="month", + ) + # THEN + assert result.data.isel(time=0) == 2 + + def test_multi_threshold_or(self): + # GIVEN + tmax = stub_tas(10, False) + tmax[1] = 15 + tmin = stub_tas(-10, False) + # WHEN + result = icclim.index( + in_files={"tmax": tmax, "tmin": tmin}, + index_name="data", + user_index=dict( + calc_operation="nb_events", + thresh=[12, -20], + var_type=USER_INDEX_TEMPERATURE_STAMP, + logical_operation=[OperatorRegistry.GREATER, OperatorRegistry.EQUAL], + link_logical_operations="or", + ), + slice_mode="month", + ) + # THEN + assert result.data[0] == 1 + + def test_multi_threshold_and(self): + # GIVEN + tmax = stub_tas(10, False) + tmax[1] = 15 + tmin = stub_tas(-10, False) + tmin[1] = -20 + # WHEN + result = icclim.index( + in_files={"tmax": tmax, "tmin": tmin}, + index_name="data", + user_index=dict( + calc_operation="nb_events", + thresh=[12, -20], + var_type=USER_INDEX_TEMPERATURE_STAMP, + logical_operation=[OperatorRegistry.GREATER, OperatorRegistry.EQUAL], + link_logical_operations="and", + ), + slice_mode="month", + ) + # THEN + assert result.data[0] == 1 + + +class Test_run_mean: + def test_run_mean_min(self): + # GIVEN + tmax = stub_tas(10, False) + tmax[30] = 0 + tmax[29] = 0 + tmax[28] = 0 + tmax[27] = 0 + tmax[26] = 0 + # WHEN + result = icclim.index( + in_files={"tmax": tmax}, + index_name="data", + user_index=dict( + calc_operation="run_mean", + extreme_mode="min", + window_width=5, + ), + slice_mode="month", + ) + # THEN + assert result.data[0] == 0 + assert result.data[1] == 2 + assert result.data[2] == 10 + + def test_run_mean_max(self): + # GIVEN + tmax = stub_tas(10, False) + tmax[30] = 20 + # WHEN + result = icclim.index( + in_files={"tmax": tmax}, + index_name="data", + rolling_window_width=2, + user_index=dict( + calc_operation="run_mean", + extreme_mode="max", + ), + slice_mode="month", + ) + # THEN + assert result.data[0] == 15 + assert result.data[1] == 15 + assert result.data[2] == 10 + + +class Test_run_sum: + def test_run_sum_min(self): + # GIVEN + tmax = stub_tas(10, False) + tmax[30] = 0 + tmax[29] = 0 + tmax[28] = 0 + tmax[27] = 0 + tmax[26] = 0 + # WHEN + result = icclim.index( + in_files={"tmax": tmax}, + index_name="data", + rolling_window_width=5, + user_index=dict( + calc_operation="run_sum", + extreme_mode="min", + ), + slice_mode="month", + ) + # THEN + assert result.data[0] == 0 + assert result.data[1] == 10 + assert result.data[2] == 50 + + def test_run_sum_max(self): + # GIVEN + tmax = stub_tas(10, False) + tmax[30] = 20 + # WHEN + result = icclim.index( + in_files={"tmax": tmax}, + index_name="data", + rolling_window_width=2, + user_index=dict( + calc_operation="run_sum", + extreme_mode="max", + ), + slice_mode="month", + ) + # THEN + assert result.data[0] == 30 + assert result.data[1] == 30 + assert result.data[2] == 20 + + +class Test_max_consecutive_event_count: + def test_simple(self): + # GIVEN + tmax = stub_tas(10, False) + tmax[30] = 15 # On 31th january + # WHEN + result = icclim.index( + in_files={"tmax": tmax}, + index_name="data", + user_index=dict( + calc_operation="max_nb_consecutive_events", + thresh=10.0, + logical_operation=OperatorRegistry.EQUAL, + ), + slice_mode="year", + ) + # THEN + assert result.data[0] == 1795 + assert result.data[1].isnull() + + +class Test_anomaly: + def test_simple(self): + # GIVEN + tmax = stub_tas(10, False) + tmax2 = stub_tas(11, False) + # WHEN + result = icclim.index( + in_files={"tmax2": tmax2, "tmax": tmax}, + index_name="data", + user_index=dict( + calc_operation="anomaly", + ), + slice_mode="year", + ) + # THEN + assert (result.data == 1).all() + assert result.data.attrs[UNITS_ATTRIBUTE_KEY] == tmax.attrs[UNITS_ATTRIBUTE_KEY] + + def test_single_var(self): + # GIVEN + tmax = stub_tas(10, False) + first_year = tmax.time.dt.year.min().values[()] + tmax = tmax.where(tmax.time.dt.year <= first_year + 1, 11) # + ref = tmax.sel(time=slice(str(first_year), str(first_year + 1))) + ref_bds = build_climatology_bounds(ref) + study = tmax.where(~tmax.time.dt.year.isin(ref.time.dt.year), drop=True) + study_bds = build_climatology_bounds(study) + # WHEN + result = icclim.index( + in_files={"tmax": tmax}, + time_range=study_bds, + base_period_time_range=ref_bds, + index_name="data", + sampling_method="groupby", + user_index=dict( + calc_operation="anomaly", + ), + slice_mode="month", + ) + # THEN + assert (result.data == 1).all() + assert len(result.data.month) == 12 + assert result.data.attrs[UNITS_ATTRIBUTE_KEY] == tmax.attrs[UNITS_ATTRIBUTE_KEY] + + def test_simple_percent(self): + # GIVEN + tmax = stub_tas(10, False) + tmax2 = stub_tas(11, False) + # WHEN + result = icclim.index( + index_name="data", + in_files={"tmax2": tmax2, "tmax": tmax}, + out_unit="%", + user_index=dict(calc_operation="anomaly"), + ) + # THEN + assert (result.data == 10).all() + assert result.data.attrs[UNITS_ATTRIBUTE_KEY] == "%" diff --git a/icclim/tests/test_user_indices.py b/icclim/tests/test_user_indices.py deleted file mode 100644 index cc06b69a..00000000 --- a/icclim/tests/test_user_indices.py +++ /dev/null @@ -1,31 +0,0 @@ -from __future__ import annotations - -import pytest - -from icclim.models.frequency import Frequency -from icclim.models.index_config import CfVariable -from icclim.models.user_index_config import LogicalOperation, UserIndexConfig -from icclim.tests.testing_utils import stub_tas - - -class Test_UserindexConfig: - @pytest.mark.parametrize("use_dask", [True, False]) - def test_simple_from_dict(self, use_dask): - dico = { - "index_name": "my_index", - "calc_operation": "min", - "logical_operation": "gt", - "thresh": 0 + 273.15, - "date_event": True, - } - tas = stub_tas(use_dask=use_dask) - config = UserIndexConfig( - **dico, freq=Frequency.MONTH, cf_vars=[CfVariable("tas", tas, tas)] - ) - assert config.index_name == "my_index" - assert config.calc_operation == "min" - assert config.logical_operation == LogicalOperation.GREATER_THAN - assert config.thresh == 273.15 - assert config.date_event - assert config.freq == Frequency.MONTH - assert config.cf_vars[0].study_da is tas diff --git a/icclim/tests/testing_utils.py b/icclim/tests/testing_utils.py index 75916aad..52232915 100644 --- a/icclim/tests/testing_utils.py +++ b/icclim/tests/testing_utils.py @@ -5,9 +5,7 @@ import xarray import xarray as xr -from icclim.models.frequency import Frequency -from icclim.models.index_config import CfVariable -from icclim.models.user_index_config import UserIndexConfig +from icclim.models.constants import UNITS_ATTRIBUTE_KEY VALUE_COUNT = 365 * 5 + 1 # 5 years of data (with 1 leap year) COORDS = dict( @@ -20,18 +18,12 @@ CF_TIME_RANGE = xr.cftime_range("2042-01-01", periods=VALUE_COUNT, freq="D") -def stub_user_index(cf_vars: list[CfVariable]): - return UserIndexConfig( - index_name="Yolo", calc_operation="noop", freq=Frequency.MONTH, cf_vars=cf_vars - ) - - def stub_tas(tas_value: float = 1.0, use_dask=False, use_cftime=False): da = xarray.DataArray( data=(np.full(VALUE_COUNT, tas_value).reshape((VALUE_COUNT, 1, 1))), dims=["time", "lat", "lon"], coords=COORDS, - attrs={"units": "K"}, + attrs={UNITS_ATTRIBUTE_KEY: "K"}, ) if use_cftime: da["time"] = CF_TIME_RANGE @@ -46,7 +38,7 @@ def stub_pr(value: float, use_dask=False): coords=COORDS, dims=["time", "lat", "lon"], name="pr", - attrs={"units": "kg m-2 d-1"}, + attrs={UNITS_ATTRIBUTE_KEY: "kg m-2 d-1"}, ) if use_dask: da.chunk() diff --git a/icclim/user_indices/__init__.py b/icclim/user_indices/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/icclim/user_indices/calc_operation.py b/icclim/user_indices/calc_operation.py index 81e8de3a..660f75b9 100644 --- a/icclim/user_indices/calc_operation.py +++ b/icclim/user_indices/calc_operation.py @@ -1,15 +1,11 @@ from __future__ import annotations -from enum import Enum -from typing import Callable, Literal +import dataclasses +from typing import Hashable, Literal -from xarray.core.dataarray import DataArray +from icclim.models.registry import Registry -from icclim.icclim_exceptions import InvalidIcclimArgumentError -from icclim.models.user_index_config import UserIndexConfig -from icclim.user_indices import operators - -CalcOperationLiteral = Literal[ +CalcOperationLike = Literal[ "max", "min", "sum", @@ -22,201 +18,25 @@ ] -def compute_user_index(config: UserIndexConfig) -> DataArray: - operation = CalcOperation.lookup(config.calc_operation) - return operation.compute_fun(config) - - -def anomaly(config: UserIndexConfig): - if ( - config.cf_vars[0].reference_da is None - or len(config.cf_vars[0].reference_da) == 0 - ): - raise InvalidIcclimArgumentError( - f"You must provide a `ref_time_range` in user_index dictionary to compute" - f" {CalcOperation.ANOMALY.value}." - f" To be valid, it must be within the dataset time range." - ) - return operators.anomaly( - da=config.cf_vars[0].study_da, - da_ref=config.cf_vars[0].reference_da, - percent=config.is_percent, - ) - - -def run_sum(config: UserIndexConfig): - if config.extreme_mode is None or config.window_width is None: - raise InvalidIcclimArgumentError( - "Please provide an extreme_mode and a window_width to user_index." - ) - return operators.run_sum( - da=config.cf_vars[0].study_da, - extreme_mode=config.extreme_mode, - window_width=config.window_width, - coef=config.coef, - freq=config.freq.pandas_freq, - date_event=config.date_event, - ) - - -def run_mean(config: UserIndexConfig): - if config.extreme_mode is None or config.window_width is None: - raise InvalidIcclimArgumentError( - "Please provide a extreme mode and a window width." - ) - return operators.run_mean( - da=config.cf_vars[0].study_da, - extreme_mode=config.extreme_mode, - window_width=config.window_width, - coef=config.coef, - freq=config.freq.pandas_freq, - date_event=config.date_event, - ) - - -def max_consecutive_event_count(config: UserIndexConfig): - if config.logical_operation is None or config.thresh is None: - raise InvalidIcclimArgumentError( - "Please provide a threshold and a logical operation." - ) - if isinstance(config.thresh, list): - raise InvalidIcclimArgumentError( - f"{CalcOperation.MAX_NUMBER_OF_CONSECUTIVE_EVENTS.value} " - f"does not support threshold list. Please provide a single threshold." - ) - return operators.max_consecutive_event_count( - da=config.cf_vars[0].study_da, - in_base_da=config.cf_vars[0].reference_da, - logical_operation=config.logical_operation, - threshold=config.thresh, - coef=config.coef, - freq=config.freq.pandas_freq, - date_event=config.date_event, - ) - - -def count_events(config: UserIndexConfig): - if config.nb_event_config is None: - raise InvalidIcclimArgumentError( - f"{CalcOperation.EVENT_COUNT.value} not properly configure." - f" Please provide a threshold and a logical operation." - ) - return operators.count_events( - das=list(map(lambda x: x.study_da, config.cf_vars)), - in_base_das=list(map(lambda x: x.reference_da, config.cf_vars)), - logical_operation=config.nb_event_config.logical_operation, - link_logical_operations=config.nb_event_config.link_logical_operations, - thresholds=config.nb_event_config.thresholds, - coef=config.coef, - freq=config.freq.pandas_freq, - date_event=config.date_event, - ) - - -def sum(config: UserIndexConfig): - return operators.sum( - da=_check_and_get_da(config), - in_base_da=_check_and_get_in_base_da(config), - coef=config.coef, - logical_operation=config.logical_operation, - threshold=_check_and_get_simple_threshold(config.thresh), - freq=config.freq.pandas_freq, - ) - - -def mean(config: UserIndexConfig): - return operators.mean( - da=_check_and_get_da(config), - in_base_da=_check_and_get_in_base_da(config), - coef=config.coef, - logical_operation=config.logical_operation, - threshold=_check_and_get_simple_threshold(config.thresh), - freq=config.freq.pandas_freq, - ) - - -def min(config: UserIndexConfig): - return _simple_reducer(operators.min, config) - - -def max(config: UserIndexConfig): - return _simple_reducer(operators.max, config) - +@dataclasses.dataclass +class CalcOperation(Hashable): + name: str -def _simple_reducer(op: Callable, config: UserIndexConfig): - return op( - da=_check_and_get_da(config), - in_base_da=_check_and_get_in_base_da(config), - coef=config.coef, - logical_operation=config.logical_operation, - threshold=_check_and_get_simple_threshold(config.thresh), - freq=config.freq.pandas_freq, - date_event=config.date_event, - ) - - -def _check_and_get_simple_threshold( - thresh: None | str | float | int, -) -> None | str | float | int: - if ( - thresh is None - or isinstance(thresh, str) - or isinstance(thresh, float) - or isinstance(thresh, int) - ): - return thresh - else: - raise InvalidIcclimArgumentError( - "threshold type must be either None, " - "a string (for percentile) or a number." - ) - - -def _check_and_get_da(config: UserIndexConfig) -> DataArray: - if len(config.cf_vars) == 1: - return config.cf_vars[0].study_da - else: - raise InvalidIcclimArgumentError( - f"There must be exactly one variable for {config.calc_operation}." - ) + def __hash__(self): + return hash(self.name) -def _check_and_get_in_base_da(config: UserIndexConfig) -> DataArray | None: - if len(config.cf_vars) == 1: - return config.cf_vars[0].reference_da - else: - raise InvalidIcclimArgumentError( - f"There must be exactly one variable for {config.calc_operation}" - ) - - -class CalcOperation(Enum): - MAX = ("max", max) - MIN = ("min", min) - SUM = ("sum", sum) - MEAN = ("mean", mean) - EVENT_COUNT = ("nb_events", count_events) - MAX_NUMBER_OF_CONSECUTIVE_EVENTS = ( +class CalcOperationRegistry(Registry): + # todo remove class once deprecation is finished (v6.1 ?) + _item_class = CalcOperation + MAX = CalcOperation("max") + MIN = CalcOperation("min") + SUM = CalcOperation("sum") + MEAN = CalcOperation("mean") + EVENT_COUNT = CalcOperation("nb_events") + MAX_NUMBER_OF_CONSECUTIVE_EVENTS = CalcOperation( "max_nb_consecutive_events", - max_consecutive_event_count, ) - RUN_MEAN = ("run_mean", run_mean) - RUN_SUM = ("run_sum", run_sum) - ANOMALY = ("anomaly", anomaly) - - def __init__( - self, input_name: str, compute_fun: Callable[[UserIndexConfig], DataArray] - ): - self.input_name = input_name - self.compute_fun = compute_fun - - @staticmethod - def lookup(calc_operation: str): - if isinstance(calc_operation, CalcOperation): - return calc_operation - for calc_op in CalcOperation: - if calc_op.input_name.upper() == calc_operation.upper(): - return calc_op - raise InvalidIcclimArgumentError( - f"The calc_operation {calc_operation} is unknown." - ) + RUN_MEAN = CalcOperation("run_mean") + RUN_SUM = CalcOperation("run_sum") + ANOMALY = CalcOperation("anomaly") diff --git a/icclim/user_indices/operators.py b/icclim/user_indices/operators.py deleted file mode 100644 index 707fcdf5..00000000 --- a/icclim/user_indices/operators.py +++ /dev/null @@ -1,486 +0,0 @@ -from __future__ import annotations - -from functools import reduce -from typing import Callable -from warnings import warn - -import dask.array -import numpy as np -import xarray -from xarray.core.dataarray import DataArray -from xarray.core.rolling import DataArrayRolling -from xclim.core.bootstrapping import percentile_bootstrap -from xclim.core.calendar import percentile_doy, resample_doy -from xclim.core.units import convert_units_to, to_agg_units -from xclim.indices.run_length import longest_run - -from icclim.icclim_exceptions import InvalidIcclimArgumentError -from icclim.models.constants import ( - PERCENTILE_THRESHOLD_STAMP, - PERCENTILES_COORD, - PRECIPITATION, - TEMPERATURE, - WET_DAY_THRESHOLD, -) -from icclim.models.user_index_config import ( - ExtremeMode, - LinkLogicalOperation, - LogicalOperation, -) -from icclim.user_indices.stat import ( - get_first_occurrence_index, - get_longest_run_start_index, -) - -__all__ = [ - "max", - "min", - "sum", - "mean", - "count_events", - "max_consecutive_event_count", - "run_mean", - "run_sum", - "anomaly", -] - - -def max( - da: DataArray, - in_base_da: DataArray | None = None, - coef: float | None = None, - logical_operation: LogicalOperation | None = None, - threshold: str | float | int | None = None, - freq: str = "MS", - date_event: bool = False, - var_type: str | None = None, - save_percentile=False, -) -> DataArray: - result = _apply_coef(coef, da) - result = _filter_by_threshold( - result, - in_base_da, - logical_operation, - threshold, - freq, - var_type, - save_percentile, - ) - resampled = result.resample(time=freq) - if date_event: - return _reduce_with_date_event( - resampled, lambda x: x.argmax("time") # type:ignore - ) - else: - return resampled.max(dim="time") - - -def min( - da: DataArray, - in_base_da: DataArray | None = None, - coef: float = None, - logical_operation: LogicalOperation = None, - threshold: str | float | int | None = None, - freq: str = "MS", - date_event: bool = False, - var_type: str = None, - save_percentile=False, -) -> DataArray: - result = _apply_coef(coef, da) - result = _filter_by_threshold( - result, - in_base_da, - logical_operation, - threshold, - freq, - var_type, - save_percentile, - ) - resampled = result.resample(time=freq) - if date_event: - return _reduce_with_date_event( - resampled, lambda x: x.argmin("time") # type:ignore - ) - else: - return resampled.min(dim="time") - - -def sum( - da: DataArray, - in_base_da: DataArray | None = None, - coef: float = None, - logical_operation: LogicalOperation = None, - threshold: str | float | int | None = None, - var_type: str = None, - freq: str = "MS", - save_percentile=False, -) -> DataArray: - result = _apply_coef(coef, da) - result = _filter_by_threshold( - result, - in_base_da, - logical_operation, - threshold, - freq, - var_type, - save_percentile, - ) - return result.resample(time=freq).sum(dim="time") - - -def mean( - da: DataArray, - in_base_da: DataArray | None = None, - coef: float = None, - logical_operation: LogicalOperation = None, - threshold: str | float | int | None = None, - var_type: str = None, - freq: str = "MS", - save_percentile=False, -) -> DataArray: - result = _apply_coef(coef, da) - result = _filter_by_threshold( - result, - in_base_da, - logical_operation, - threshold, - freq, - var_type, - save_percentile, - ) - return result.resample(time=freq).mean(dim="time") - - -def count_events( - logical_operation: list[LogicalOperation], - thresholds: list[float | str], - das: list[DataArray], - in_base_das: list[DataArray | None], - link_logical_operations: LinkLogicalOperation = None, - coef: float = None, - var_type: str = None, - freq: str = "MS", - date_event: bool = False, - save_percentile: bool = False, -) -> DataArray: - percentiles = [] - for i, threshold in enumerate(thresholds): - if isinstance(threshold, str) and len(in_base_das) > 0: - in_base_da = in_base_das[i] - if in_base_da is not None: - percentiles.append(_get_percentiles(threshold, var_type, in_base_da)) - acc = [] - for i, da in enumerate(das): - result = _apply_coef(coef, da) - if len(percentiles) > 0: - result = _threshold_compare_on_percentiles( - da=result, - percentiles=percentiles[i], - logical_operation=logical_operation[i], - freq=freq, - bootstrap=_is_bootstrappable(var_type), - ) - if save_percentile: - result.coords[f"percentile_{thresholds[i]}"] = resample_doy( - percentiles[i], result - ) - else: - result = logical_operation[i].compute(result, thresholds[i]) # type:ignore - acc.append(result) - if len(acc) == 1: - result = acc[0] - elif link_logical_operations == LinkLogicalOperation.AND_STAMP: - result = reduce(np.logical_and, acc, True) # type:ignore - elif link_logical_operations == LinkLogicalOperation.OR_STAMP: - result = reduce(np.logical_or, acc, False) # type:ignore - else: - raise NotImplementedError() - resampled = result.resample(time=freq) - if date_event: - return _get_count_events_date_event(resampled) - return resampled.sum(dim="time") - - -def max_consecutive_event_count( - da: DataArray, - logical_operation: LogicalOperation, - in_base_da: DataArray | None = None, - threshold: str | float | int | None = None, - coef: float = None, - freq: str = "MS", - date_event: bool = False, - var_type: str | None = None, - save_percentile=False, -) -> DataArray: - result = _apply_coef(coef, da) - if in_base_da is not None and isinstance(threshold, str): - per = _get_percentiles(threshold, var_type, in_base_da) - result = _threshold_compare_on_percentiles( - da=da, - percentiles=per, - logical_operation=logical_operation, - freq=freq, - bootstrap=_is_bootstrappable(var_type), - ) - if save_percentile: - result.coords[PERCENTILES_COORD] = resample_doy(per, result) - elif isinstance(threshold, float) or isinstance(threshold, int): - result = logical_operation.compute(da, threshold) - resampled = result.resample(time=freq) - if not date_event: - return resampled.map(longest_run, dim="time") - acc: list[DataArray] = [] - for label, value in resampled: - run_length = longest_run(value, dim="time") - index = get_longest_run_start_index(value, dim="time") - start = value[index.astype(int)].time - time_shift = run_length * np.timedelta64(1, "D") - end = start + time_shift - coords = dict( - time=label, - lat=value.lat, - lon=value.lon, - event_date_start=start, - event_date_end=end, - ) - acc.append(DataArray(data=run_length, dims=["lat", "lon"], coords=coords)) - result = xarray.concat(acc, "time") - return to_agg_units(result, da, "count") - - -def run_mean( - da: DataArray, - extreme_mode: ExtremeMode, - window_width: int, - coef: float = None, - freq: str = "MS", - date_event: bool = False, -) -> DataArray: - return _run_aggregator( - da=da, - extreme_mode=extreme_mode, - window_width=window_width, - coef=coef, - freq=freq, - date_event=date_event, - aggregator=lambda da: da.mean(), - ) - - -def run_sum( - da: DataArray, - extreme_mode: ExtremeMode, - window_width: int, - coef: float = None, - freq: str = "MS", - date_event: bool = False, -) -> DataArray: - return _run_aggregator( - da=da, - extreme_mode=extreme_mode, - window_width=window_width, - coef=coef, - freq=freq, - date_event=date_event, - aggregator=lambda da: da.sum(), - ) - - -def anomaly(da_ref: DataArray, da: DataArray, percent: bool) -> DataArray: - ref_mean = da_ref.mean(dim="time") - result: DataArray = da.mean(dim="time") - ref_mean - result._copy_attrs_from(da_ref) - if percent: - result = result / ref_mean * 100 - result.attrs["units"] = "%" - return result - - -def _apply_coef(coef: float | None, da: DataArray) -> DataArray: - if coef is not None: - return da * coef - return da - - -def _filter_by_threshold( - da: DataArray, - in_base_da: DataArray | None, - logical_operation: LogicalOperation | None, - threshold: str | float | int | None, - freq: str, - var_type: str | None, - save_percentile: bool, -) -> DataArray: - if threshold is None and logical_operation is None: - return da - if isinstance(threshold, str): - if in_base_da is None: - raise NotImplementedError( - "When threshold type is str for percentiles, a in_base must be provided" - ) - per = _get_percentiles(threshold, var_type, in_base_da) - result = _filter_by_logical_op_on_percentile( - da=da, - percentiles=_get_percentiles(threshold, var_type, in_base_da), - logical_operation=logical_operation, - freq=freq, - bootstrap=_is_bootstrappable(var_type), - ) - if save_percentile: - result.coords[PERCENTILES_COORD] = resample_doy(per, result) - elif ( - isinstance(threshold, float) or isinstance(threshold, int) - ) and logical_operation is not None: - result = da.where(logical_operation.compute(da, threshold)) - else: - raise NotImplementedError( - "threshold type must be on of [str, int, float] and logical_operation must " - "a LogicalOperation instance" - ) - if len(result) == 0: - raise InvalidIcclimArgumentError( - f"The dataset has been emptied by filtering with " - f"{logical_operation.operator}{threshold}." - ) - return result - - -@percentile_bootstrap -def _filter_by_logical_op_on_percentile( - da: DataArray, - percentiles: DataArray | None, - logical_operation: LogicalOperation | None, - freq: str = "MS", # noqa # used by percentile_bootstrap - bootstrap: bool = False, # used by percentile_bootstrap -) -> DataArray: - if logical_operation is not None and percentiles is not None: - percentiles = resample_doy(percentiles, da) - mask = logical_operation.compute(da, percentiles) - result = da.where(mask, drop=True) - if bootstrap: - result = da.expand_dims(_bootstrap=result._bootstrap) - return result - return da - - -@percentile_bootstrap -def _threshold_compare_on_percentiles( - da: DataArray, - percentiles: DataArray, - logical_operation: LogicalOperation, - freq: str = "MS", # noqa # used by percentile_bootstrap - bootstrap: bool = False, # noqa # used by percentile_bootstrap -) -> DataArray: - percentiles = resample_doy(percentiles, da) - return logical_operation.compute(da, percentiles) - - -def _get_percentiles( - thresh: str, var_type: str | None, in_base_da: DataArray -) -> DataArray: - if thresh.find(PERCENTILE_THRESHOLD_STAMP) == -1: - raise InvalidIcclimArgumentError( - "Percentile threshold not properly formatted." - " Use p as a prefix or suffix of the value for example 90p or p90." - " For non percentile threshold use a float instead of a string" - ) - per = float(thresh.replace(PERCENTILE_THRESHOLD_STAMP, "")) - da_per = in_base_da - if var_type == PRECIPITATION: - da_per = convert_units_to(in_base_da, "mm/d") - da_per = da_per.where(da_per > WET_DAY_THRESHOLD, drop=True) - percentiles = percentile_doy(arr=da_per, per=per).sel(percentiles=per) - return percentiles - - -def _run_aggregator( - da: DataArray, - extreme_mode: ExtremeMode, - window_width: int, - aggregator: Callable[[DataArrayRolling], DataArray], - coef: float = None, - freq: str = "MS", - date_event: bool = False, -) -> DataArray: - result = _apply_coef(coef, da) - result = result.rolling(time=window_width) - resampled = aggregator(result).resample(time=freq) - if extreme_mode == ExtremeMode.MIN: - if date_event: - return _reduce_with_date_event( - resampled, - lambda x: x.argmin("time"), # type:ignore - window=window_width, - ) - else: - return resampled.min(dim="time") - elif extreme_mode == ExtremeMode.MAX: - if date_event: - return _reduce_with_date_event( - resampled, - lambda x: x.argmax("time"), - window=window_width, - ) - else: - return resampled.max(dim="time") - else: - raise NotImplementedError() - - -def _reduce_with_date_event( - resampled: DataArray, - reducer: Callable[[DataArray], DataArray], - window: int | None = None, -) -> DataArray: - acc: list[DataArray] = [] - for label, value in resampled: - reduced_result = value.isel(time=reducer(value)) - if window is not None: - coords = dict( - time=label, - lat=value.lat, - lon=value.lon, - event_date_start=reduced_result.time, - event_date_end=reduced_result.time + np.timedelta64(window, "D"), - ) - else: - coords = dict( - time=label, - lat=value.lat, - lon=value.lon, - event_date=reduced_result.time, - ) - acc.append(DataArray(data=reduced_result, dims=["lat", "lon"], coords=coords)) - return xarray.concat(acc, "time") - - -def _get_count_events_date_event(resampled): - if isinstance(resampled, dask.array.Array): - warn("Computing event_date_start/end when using Dask arrays can be slow.") - acc: list[DataArray] = [] - for label, sample in resampled: - # Fixme probably not safe to compute on huge dataset, - # it should be fixed with - # https://github.com/pydata/xarray/issues/2511 - sample = sample.compute() - first = sample.isel(time=get_first_occurrence_index(sample)).time - value_reversed_time = sample[::-1, :, :] - last = sample.isel(time=get_first_occurrence_index(value_reversed_time)).time - acc.append( - DataArray( - data=sample.sum(dim="time"), - dims=["lat", "lon"], - coords=dict( - time=label, - lat=sample.lat, - lon=sample.lon, - event_date_start=first, - event_date_end=last, - ), - ) - ) - return xarray.concat(acc, "time") - - -def _is_bootstrappable(var_type): - return var_type == TEMPERATURE diff --git a/icclim/user_indices/stat.py b/icclim/user_indices/stat.py deleted file mode 100644 index de61b865..00000000 --- a/icclim/user_indices/stat.py +++ /dev/null @@ -1,44 +0,0 @@ -from __future__ import annotations - -from typing import Sequence - -import numpy as np -import xarray -from xarray import DataArray -from xclim.indices.run_length import rle_1d - - -def get_longest_run_start_index( - arr: DataArray, - window: int = 1, - dim: str = "time", -) -> DataArray: - return xarray.apply_ufunc( - get_index_of_longest_run, - arr, - input_core_dims=[[dim]], - kwargs={"window": window}, - vectorize=True, - dask="parallelized", - output_dtypes=[float], - ) - - -def get_index_of_longest_run(arr: Sequence[bool], window: int = 1) -> int: - values, rl, pos = rle_1d(arr) - if not np.any(values) or np.all(values * rl < window): # type:ignore - return 0 - index_of_max = np.nanargmax( - np.where(values * rl >= window, rl, np.NaN) # type:ignore - ) - return pos[index_of_max] # type:ignore - - -def get_first_occurrence_index(da: DataArray) -> DataArray: - """ - Return the index of the first True value in the 3D booleans array along - time dimension. - """ - stacked = da.stack(latlon=("lat", "lon")) - res = stacked.argmax("time") - return res.unstack() diff --git a/icclim/utils.py b/icclim/utils.py index c09bc1b7..6c543f7b 100644 --- a/icclim/utils.py +++ b/icclim/utils.py @@ -3,12 +3,13 @@ from datetime import datetime import dateparser -from xarray import DataArray, Dataset +import xarray +from xarray import Dataset from icclim.icclim_exceptions import InvalidIcclimArgumentError -def _da_chunksizes(da: DataArray) -> dict: +def _da_chunksizes(da: xarray.Variable) -> dict: # FIXME To remove once minimal xarray version is v0.20.0 (use .chunksizes instead) # Copied and adapted from xarray if hasattr(da.data, "chunks"): @@ -17,10 +18,10 @@ def _da_chunksizes(da: DataArray) -> dict: return {} -def _get_chunksizes(ds: Dataset) -> dict: +def get_chunksizes(ds: Dataset) -> dict: # FIXME To remove once minimal xarray version is v0.20.0 (use .chunksizes instead) # Copied and adapted from xarray - chunks = {} + chunks: dict[str, int] = {} for v in ds.variables.values(): if hasattr(v.data, "chunks"): for dim, c in _da_chunksizes(v).items(): @@ -33,13 +34,16 @@ def _get_chunksizes(ds: Dataset) -> dict: return chunks -def read_date(date_string: str) -> datetime: - error_msg = ( - "The date {} does not have a valid format." - " You can use various formats such as '2 december' or '02-12'." - ) - if (date := dateparser.parse(date_string)) is None: - raise InvalidIcclimArgumentError(error_msg.format(date_string)) +def read_date(in_date: str | datetime) -> datetime: + if isinstance(in_date, datetime): + return in_date + date = dateparser.parse(in_date) + if date is None: + raise InvalidIcclimArgumentError( + f"The date {in_date} does not have a valid format." + " You can use various formats such as '2 december', '02-12'," + " '1994-12-02'..." + ) return date @@ -47,3 +51,9 @@ def get_date_to_iso_format(in_date: str | datetime) -> str: if isinstance(in_date, str): in_date = read_date(in_date) return in_date.strftime("%Y-%m-%d") + + +def is_number_sequence(values) -> bool: + return isinstance(values, (tuple, list)) and all( + map(lambda x: isinstance(x, (float, int)), values) + ) diff --git a/requirements.txt b/requirements.txt index 352299b9..4dbf7e98 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,7 @@ dask[array] dateparser distributed fsspec +jinja2 netCDF4~=1.5.7 numpy pandas diff --git a/tools/extract-icclim-funs.py b/tools/extract-icclim-funs.py index b7a9a378..9157cb87 100644 --- a/tools/extract-icclim-funs.py +++ b/tools/extract-icclim-funs.py @@ -21,13 +21,10 @@ from pathlib import Path import icclim -from icclim.ecad.ecad_indices import EcadIndex -from icclim.models.constants import ( - MODIFIABLE_QUANTILE_WINDOW, - MODIFIABLE_THRESHOLD, - MODIFIABLE_UNIT, - QUANTILE_BASED, -) +from icclim.ecad.ecad_indices import EcadIndexRegistry +from icclim.models.constants import QUANTILE_BASED +from icclim.models.standard_index import StandardIndex +from icclim.models.threshold import Threshold ICCLIM_MANDATORY_FIELDS = ["in_files", "index_name"] # Note: callback args are not included below @@ -44,13 +41,9 @@ "base_period_time_range", "only_leap_years", "interpolation", - "save_percentile", + "save_thresholds", ] -MODIFIABLE_QUANTILE_WINDOW_FIELD = "window_width" -MODIFIABLE_THRESHOLD_FIELD = "threshold" -MODIFIABLE_UNIT_FIELD = "out_unit" - TAB = " " END_NOTE = """ @@ -60,38 +53,42 @@ """ -DEFAULT_OUTPUT_PATH = ( - Path(os.path.dirname(os.path.abspath(__file__))) / "icclim_wrapped.py" -) +DEFAULT_OUTPUT_PATH = Path(os.path.dirname(os.path.abspath(__file__))) / "pouet.py" -def run(file_path): - with open(file_path, "w") as f: +def run(path): + with open(path, "w") as f: acc = '''""" This module has been auto-generated. To modify these, edit the extractor tool in `tools/extract-icclim-funs.py`. This module exposes each climate index as individual functions for convenience. """ +# flake8: noqa E501 from __future__ import annotations import datetime +from typing import Sequence from xarray.core.dataset import Dataset import icclim from icclim.icclim_logger import Verbosity -from icclim.models.frequency import Frequency, SliceMode +from icclim.icclim_types import InFileLike, SamplingMethodLike +from icclim.models.frequency import Frequency, FrequencyLike from icclim.models.netcdf_version import NetcdfVersion from icclim.models.quantile_interpolation import QuantileInterpolation +from icclim.models.threshold import Threshold from icclim.models.user_index_dict import UserIndexDict -from icclim.pre_processing.input_parsing import InFileType __all__ = [ ''' - acc += ",\n".join(list(map(lambda x: f'{TAB}"{x.name.lower()}"', EcadIndex))) + ecad_indices = EcadIndexRegistry.values() + acc += ",\n".join( + list(map(lambda x: f'{TAB}"{x.short_name.lower()}"', ecad_indices)) + ) acc += f',\n{TAB}"custom_index",\n]\n' - for index in EcadIndex: - acc += get_ecad_index_declaration(index) + for index in ecad_indices: + acc += get_standard_index_declaration(index) acc += get_user_index_declaration() f.write(acc) @@ -103,16 +100,15 @@ def get_user_index_declaration() -> str: pop_args.append("indice_name") pop_args.append("user_indice") pop_args.append("transfer_limit_Mbytes") + pop_args.append("save_percentile") + pop_args.append("window_width") # Pop unnecessary args pop_args.append("callback") pop_args.append("callback_percentage_start_value") pop_args.append("callback_percentage_total") pop_args.append("index_name") pop_args.append("threshold") - pop_args.append("window_width") - # Pop not implemented yet - pop_args.append("interpolation") - # Pop manually added arg + # Pop manually added args pop_args.append("user_index") # for `custom_index`, user_index is mandatory for pop_arg in pop_args: icclim_index_args.pop(pop_arg) @@ -151,36 +147,53 @@ def build_fun_signature_args(args) -> str: return f"\n{TAB}" + f",\n{TAB}".join(map(get_parameter_declaration, args.values())) -def get_ecad_index_declaration(index: EcadIndex) -> str: +def get_standard_index_declaration(index: StandardIndex) -> str: icclim_index_args = dict(inspect.signature(icclim.index).parameters) pop_args = [] # Pop deprecated args pop_args.append("indice_name") pop_args.append("user_indice") pop_args.append("transfer_limit_Mbytes") + pop_args.append("save_percentile") + pop_args.append("window_width") # Pop unnecessary args pop_args.append("user_index") pop_args.append("callback") pop_args.append("callback_percentage_start_value") pop_args.append("callback_percentage_total") - pop_args.append("index_name") # specified with function name + # index_name -> specified with function name + pop_args.append("index_name") + # threshold; + # popped because not configurable on StandardIndices + # (ECAD requires specific thresholds) + pop_args.append("threshold") + # out_unit; + # popped because not configurable on StandardIndices + # (ECAD requires specific untis) + pop_args.append("out_unit") + # doy_window_width -> doy per window; + # popped because not configurable on StandardIndices + # (ECAD requires 5) + pop_args.append("doy_window_width") + # rolling_window_width; popped because no standard index rely on rolling window + pop_args.append("rolling_window_width") + # min_spell_length + # -> min spell length to be taken into account for `sum_of_spell_length` indices; + # popped because not configurable on StandardIndices (ECAD requires 6) + pop_args.append("min_spell_length") + # pop not implemented yet args + pop_args.append("sampling_method") + qualifiers = [] if index.qualifiers is None else index.qualifiers - if QUANTILE_BASED not in qualifiers: + is_per_based = QUANTILE_BASED in qualifiers + if not is_per_based: for arg in QUANTILE_INDEX_FIELDS: pop_args.append(arg) - if MODIFIABLE_QUANTILE_WINDOW not in qualifiers: - pop_args.append(MODIFIABLE_QUANTILE_WINDOW_FIELD) - if MODIFIABLE_THRESHOLD not in qualifiers: - pop_args.append(MODIFIABLE_THRESHOLD_FIELD) - if MODIFIABLE_UNIT not in qualifiers: - pop_args.append(MODIFIABLE_UNIT_FIELD) - for pop_arg in pop_args: icclim_index_args.pop(pop_arg) - # TODO replace these concatenation mess with a proper template (jinja or similar)... fun_signature_args = build_fun_signature_args(icclim_index_args) fun_signature = ( - f"\n\ndef {index.name.lower()}({fun_signature_args},\n) -> Dataset:\n" + f"\n\ndef {index.short_name.lower()}({fun_signature_args},\n) -> Dataset:\n" ) args_docs = get_params_docstring( list(icclim_index_args.keys()), icclim.index.__doc__ @@ -193,10 +206,22 @@ def get_ecad_index_declaration(index: EcadIndex) -> str: f"{END_NOTE}" f'{TAB}"""\n' ) - index_name_arg = f'\n{TAB}{TAB}index_name="{index.name}",\n{TAB}{TAB}' + index_name_arg = f'\n{TAB}{TAB}index_name="{index.short_name.upper()}",\n{TAB}{TAB}' + fun_call_args = index_name_arg + f",\n{TAB}{TAB}".join( [a + "=" + a for a in icclim_index_args] ) + if isinstance(index.threshold, (str, Threshold)): + fun_call_args += ( + f",\n{TAB}{TAB}threshold={format_thresh(index.threshold, is_per_based)}" + ) + elif isinstance(index.threshold, (list, tuple)): + fun_call_args += f",\n{TAB}{TAB}threshold=[" + for t in index.threshold: + fun_call_args += format_thresh(t, is_per_based) + "," + fun_call_args += "]" + if index.output_unit is not None: + fun_call_args += f',\n{TAB}{TAB}out_unit="{index.output_unit}"' fun_call = f"{TAB}return icclim.index({fun_call_args},\n{TAB})\n" return f"{fun_signature}{docstring}{fun_call}" @@ -218,16 +243,35 @@ def get_parameter_declaration(param: inspect.Parameter) -> str: def get_params_docstring(args: list[str], index_docstring: str) -> str: result = f"{TAB}Parameters\n{TAB}----------\n" - matches = list(re.compile(".+ : .*").finditer(index_docstring)) + args_declaration = list(re.compile(r"\n\s{4}\w+.*: .*").finditer(index_docstring)) for arg in args: - for i in range(0, len(matches) - 2): - if matches[i].group().strip().startswith(arg): - result += index_docstring[matches[i].start() : matches[i + 1].start()] - if matches[-1].group().strip().startswith(arg): - result += index_docstring[matches[-1].start() :] + for i in range(0, len(args_declaration) - 2): + if args_declaration[i].group().strip().startswith(arg): + result += index_docstring[ + args_declaration[i].start() : args_declaration[i + 1].start() + ] + if args_declaration[-1].group().strip().startswith(arg): + # Add everything after the last argument + result += index_docstring[args_declaration[-1].start() :] return result +def format_thresh(t: str | Threshold, is_percentile_based: bool) -> str: + if isinstance(t, str): + t = Threshold(t) + params = f'{TAB}{TAB}{TAB}query="{t.initial_query}",\n' + if is_percentile_based: + params += ( + f"{TAB}{TAB}{TAB}doy_window_width={t.doy_window_width},\n" + f"{TAB}{TAB}{TAB}only_leap_years=only_leap_years,\n" + f"{TAB}{TAB}{TAB}interpolation=interpolation,\n" + f"{TAB}{TAB}{TAB}reference_period=base_period_time_range,\n" + ) + if t.threshold_min_value is not None: + params += f'{TAB}{TAB}{TAB}threshold_min_value="{t.threshold_min_value.initial_query}",\n' + return f"{TAB}{TAB}{TAB}Threshold({params})" + + if __name__ == "__main__": file_path = sys.argv[1] if len(sys.argv) > 1 else DEFAULT_OUTPUT_PATH run(file_path)