diff --git a/CHANGES.rst b/CHANGES.rst index e32cfa915..5da808d05 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -22,12 +22,15 @@ New features and enhancements * Added new function ``xclim.sdba.properties.std`` to calculate the standard deviation of a variable over all years at a given time resolution. (:pull:`1445`). * Amended the documentation of ``xclim.sdba.properties.trend`` to document already existing functionality of calculating the return values of ``scipy.stats.linregress``. (:pull:`1445`). * Add support for setting optional variables through the `ds` argument. (:issue:`1432`, :pull:`1435`). +* New ``xclim.core.calendar.is_offset_divisor`` to test if a given freq divides another one evenly (:pull:`1446`). +* Missing value objects now support input timeseries of quarterly and yearly frequencies (:pull:`1446`). +* Missing value checks enabled for all "generic" indicators (``return_level``, ``fit`` and ``stats``) (:pull:`1446`). Bug fixes ^^^^^^^^^ -* Fix `kldiv` docstring so the math formula renders to HTML. (:issue:`1408`, :pull:`1409`). +* Fix ``kldiv`` docstring so the math formula renders to HTML. (:issue:`1408`, :pull:`1409`). * Fix the registry entries of "generic" indicators. (:issue:`1423`, :pull:`1424`). -* Fix `jetstream_metric_woollings` so it uses the `vertical` coordinate identified by `cf-xarray`, instead of `pressure`. (:issue:`1421`, :pull:`1422`). Add logic to handle coordinates in decreasing order, or for longitudes defined from 0-360 instead of -180 to 180. (:issue:`1429`, :pull:`1430`). +* Fix ``jetstream_metric_woollings`` so it uses the `vertical` coordinate identified by `cf-xarray`, instead of `pressure`. (:issue:`1421`, :pull:`1422`). Add logic to handle coordinates in decreasing order, or for longitudes defined from 0-360 instead of -180 to 180. (:issue:`1429`, :pull:`1430`). * Fix virtual indicator attribute assignment causing individual indicator's realm to be ignored. (:issue:`1425`, :pull:`1426`). * Fixes the `raise_flags` argument of ``xclim.core.dataflags.data_flags`` so that an Exception is only raised when some checkups fail (:issue:`1456`, :pull:`1457`). * Fix ``xclim.indices.generic.get_zones`` so that `bins` can be given as input without error. (:pull:`1455`). @@ -49,6 +52,7 @@ Internal changes * Added a helper module ``_finder`` in the notebooks folder so that the working directory can always be found, with redundancies in place to prevent scripts from failing if the helper file is not found. (:pull:`1449`). * Added a manual cache-cleaning workflow (based on `GitHub cache-cleaning example `_), triggered when a branch has been merged. (:pull:`1462`). * Added a workflow for posting updates to the xclim Mastodon account (using `cbrgm/mastodon-github-action `_, triggered when a new version is published. (:pull:`1462`). +* Refactor base indicator classes and fix misleading inheritance of ``return_level`` (:issue:`1263`, :pull:`1446`). Breaking changes ^^^^^^^^^^^^^^^^ diff --git a/tests/test_generic_indicators.py b/tests/test_generic_indicators.py index 8a45a5f2f..8e9cc60a9 100644 --- a/tests/test_generic_indicators.py +++ b/tests/test_generic_indicators.py @@ -11,6 +11,7 @@ def test_simple(self, pr_ndseries, random): ts = generic.stats(pr, freq="YS", op="max") p = generic.fit(ts, dist="gumbel_r") assert p.attrs["estimator"] == "Maximum likelihood" + assert "time" not in p.dims def test_nan(self, pr_series, random): r = random.random(22) @@ -18,7 +19,10 @@ def test_nan(self, pr_series, random): pr = pr_series(r) out = generic.fit(pr, dist="norm") - assert not np.isnan(out.values[0]) + assert np.isnan(out.values[0]) + with set_options(check_missing="skip"): + out = generic.fit(pr, dist="norm") + assert not np.isnan(out.values[0]) def test_ndim(self, pr_ndseries, random): pr = pr_ndseries(random.random((100, 1, 2))) @@ -28,6 +32,9 @@ def test_ndim(self, pr_ndseries, random): def test_options(self, q_series, random): q = q_series(random.random(19)) + out = generic.fit(q, dist="norm") + np.testing.assert_array_equal(out.isnull(), False) + with set_options(missing_options={"at_least_n": {"n": 10}}): out = generic.fit(q, dist="norm") np.testing.assert_array_equal(out.isnull(), False) @@ -87,8 +94,9 @@ def test_ndq(self, ndq_series): assert out.attrs["units"] == "m3 s-1" def test_missing(self, ndq_series): - a = ndq_series - a = ndq_series.where(~((a.time.dt.dayofyear == 5) * (a.time.dt.year == 1902))) + a = ndq_series.where( + ~((ndq_series.time.dt.dayofyear == 5) & (ndq_series.time.dt.year == 1902)) + ) assert a.shape == (5000, 2, 3) out = generic.stats(a, op="max", month=1) diff --git a/tests/test_missing.py b/tests/test_missing.py index d0e501f2b..86fccf6cf 100644 --- a/tests/test_missing.py +++ b/tests/test_missing.py @@ -31,6 +31,21 @@ def test_monthly_input(self, random): mb = missing.MissingBase(ts, freq="AS", src_timestep="M", season="JJA") assert mb.count == 3 + def test_seasonal_input(self, random): + """Creating array with 11 seasons.""" + n = 11 + time = xr.cftime_range(start="2002-04-01", periods=n, freq="QS-JAN") + ts = xr.DataArray(random.random(n), dims="time", coords={"time": time}) + mb = missing.MissingBase(ts, freq="YS", src_timestep="QS-JAN") + # Make sure count is 12, because we're requesting a YS freq. + np.testing.assert_array_equal(mb.count, [4, 4, 4, 1]) + + with pytest.raises( + NotImplementedError, + match="frequency that is not aligned with the source timestep.", + ): + missing.MissingBase(ts, freq="YS", src_timestep="QS-DEC") + class TestMissingAnyFills: def test_missing_days(self, tas_series): @@ -144,6 +159,13 @@ def test_hourly(self, pr_hr_series): out = missing.missing_any(pr, freq="MS") np.testing.assert_array_equal(out, [True, False, True]) + def test_seasonal(self, random): + n = 11 + time = xr.cftime_range(start="2002-01-01", periods=n, freq="QS-JAN") + ts = xr.DataArray(random.random(n), dims="time", coords={"time": time}) + out = missing.missing_any(ts, freq="YS") + np.testing.assert_array_equal(out, [False, False, True]) + class TestMissingWMO: def test_missing_days(self, tas_series): diff --git a/xclim/core/calendar.py b/xclim/core/calendar.py index c356aefed..a898877d2 100644 --- a/xclim/core/calendar.py +++ b/xclim/core/calendar.py @@ -39,6 +39,7 @@ "ensure_cftime_array", "get_calendar", "interp_calendar", + "is_offset_divisor", "max_doy", "parse_offset", "percentile_doy", @@ -841,6 +842,58 @@ def construct_offset(mult: int, base: str, start_anchored: bool, anchor: str | N ) +def is_offset_divisor(divisor: str, offset: str): + """Check that divisor is a divisor of offset. + + A frequency is a "divisor" of another if a whole number of periods of the + former fit within a single period of the latter. + + Parameters + ---------- + divisor: str + The divisor frequency. + offset: str + The large frequency. + + Returns + ------- + bool + + Examples + -------- + >>> is_offset_divisor("QS-Jan", "YS") + True + >>> is_offset_divisor("QS-DEC", "AS-JUL") + False + >>> is_offset_divisor("D", "M") + True + """ + if compare_offsets(divisor, ">", offset): + return False + # Reconstruct offsets anchored at the start of the period + # to have comparable quantities, also get "offset" objects + mA, bA, sA, aA = parse_offset(divisor) + offAs = pd.tseries.frequencies.to_offset(construct_offset(mA, bA, True, aA)) + + mB, bB, sB, aB = parse_offset(offset) + offBs = pd.tseries.frequencies.to_offset(construct_offset(mB, bB, True, aB)) + tB = pd.date_range("1970-01-01T00:00:00", freq=offBs, periods=13) + + if bA in "WDHTLUN" or bB in "WDHTLUN": + # Simple length comparison is sufficient for submonthly freqs + # In case one of bA or bB is > W, we test many to be sure. + tA = pd.date_range("1970-01-01T00:00:00", freq=offAs, periods=13) + return np.all( + (np.diff(tB)[:, np.newaxis] / np.diff(tA)[np.newaxis, :]) % 1 == 0 + ) + + # else, we test alignment with some real dates + # If both fall on offAs, then is means divisor is aligned with offset at those dates + # if N=13 is True, then it is always True + # As divisor <= offset, this means divisor is a "divisor" of offset. + return all(offAs.is_on_offset(d) for d in tB) + + def _interpolate_doy_calendar( source: xr.DataArray, doy_max: int, doy_min: int = 1 ) -> xr.DataArray: diff --git a/xclim/core/indicator.py b/xclim/core/indicator.py index ad1d7d3f8..eba93350f 100644 --- a/xclim/core/indicator.py +++ b/xclim/core/indicator.py @@ -1353,11 +1353,13 @@ def _show_deprecation_warning(self): ) -class ResamplingIndicator(Indicator): - """Indicator that performs a resampling computation. +class CheckMissingIndicator(Indicator): + """Class adding missing value checks to indicators. - Compared to the base Indicator, this adds the handling of missing data, - and the check of allowed periods. + This should not be used as-is, but subclassed by implementing the `_get_missing_freq` method. + This method will be called in `_postprocess` using the compute parameters as only argument. + It should return a freq string, the same as the output freq of the computed data. + It can also be "None" to indicator the full time axis has been reduced, or "False" to skip the missing checks. Parameters ---------- @@ -1366,24 +1368,10 @@ class ResamplingIndicator(Indicator): None, this will be determined by the global configuration (see `xclim.set_options`). Defaults to "from_context". missing_options : dict, optional Arguments to pass to the `missing` function. If None, this will be determined by the global configuration. - allowed_periods : Sequence[str], optional - A list of allowed periods, i.e. base parts of the `freq` parameter. For example, indicators meant to be - computed annually only will have `allowed_periods=["A"]`. `None` means "any period" or that the - indicator doesn't take a `freq` argument. """ missing = "from_context" missing_options: dict | None = None - allowed_periods: list[str] | None = None - - @classmethod - def _ensure_correct_parameters(cls, parameters): - if "freq" not in parameters: - raise ValueError( - "ResamplingIndicator require a 'freq' argument, use the base Indicator" - " class if your computation doesn't perform any resampling." - ) - return super()._ensure_correct_parameters(parameters) def __init__(self, **kwds): if self.missing == "from_context" and self.missing_options is not None: @@ -1399,23 +1387,6 @@ def __init__(self, **kwds): super().__init__(**kwds) - def _preprocess_and_checks(self, das, params): - """Perform parent's checks and also check if freq is allowed.""" - das, params = super()._preprocess_and_checks(das, params) - - # Check if the period is allowed: - if ( - self.allowed_periods is not None - and parse_offset(params["freq"])[1] not in self.allowed_periods - ): - raise ValueError( - f"Resampling frequency {params['freq']} is not allowed for indicator " - f"{self.identifier} (needs something equivalent to one " - f"of {self.allowed_periods})." - ) - - return das, params - def _history_string(self, **kwargs): if self.missing == "from_context": missing = OPTIONS[CHECK_MISSING] @@ -1430,11 +1401,16 @@ def _history_string(self, **kwargs): return super()._history_string(**kwargs) + opt_str + def _get_missing_freq(self, params): + """Return the resampling frequency to be used in the missing values check.""" + raise NotImplementedError("Don't use `CheckMissingIndicator` directly.") + def _postprocess(self, outs, das, params): """Masking of missing values.""" outs = super()._postprocess(outs, das, params) - if self.missing != "skip": + freq = self._get_missing_freq(params) + if self.missing != "skip" or freq is False: # Mask results that do not meet criteria defined by the `missing` method. # This means all outputs must have the same dimensions as the broadcasted inputs (excluding time) options = self.missing_options or OPTIONS[MISSING_OPTIONS].get( @@ -1444,24 +1420,96 @@ def _postprocess(self, outs, das, params): # We flag periods according to the missing method. skip variables without a time coordinate. src_freq = self.src_freq if isinstance(self.src_freq, str) else None miss = ( - self._missing( - da, params["freq"], src_freq, options, params.get("indexer", {}) - ) + self._missing(da, freq, src_freq, options, params.get("indexer", {})) for da in das.values() if "time" in da.coords ) # Reduce by or and broadcast to ensure the same length in time # When indexing is used and there are no valid points in the last period, mask will not include it mask = reduce(np.logical_or, miss) - if isinstance(mask, DataArray) and mask.time.size < outs[0].time.size: + if ( + isinstance(mask, DataArray) + and "time" in mask.dims + and mask.time.size < outs[0].time.size + ): mask = mask.reindex(time=outs[0].time, fill_value=True) outs = [out.where(~mask) for out in outs] return outs -class ResamplingIndicatorWithIndexing(ResamplingIndicator): - """Resampling indicator that also injects "indexer" kwargs to subset the inputs before computation.""" +class ReducingIndicator(CheckMissingIndicator): + """Indicator that performs a time-reducing computation. + + Compared to the base Indicator, this adds the handling of missing data. + + Parameters + ---------- + missing : {any, wmo, pct, at_least_n, skip, from_context} + The name of the missing value method. See `xclim.core.missing.MissingBase` to create new custom methods. If + None, this will be determined by the global configuration (see `xclim.set_options`). Defaults to "from_context". + missing_options : dict, optional + Arguments to pass to the `missing` function. If None, this will be determined by the global configuration. + """ + + def _get_missing_freq(self, params): + """Return None, to indicate that the full time axis is to be reduced.""" + return None + + +class ResamplingIndicator(CheckMissingIndicator): + """Indicator that performs a resampling computation. + + Compared to the base Indicator, this adds the handling of missing data, + and the check of allowed periods. + + Parameters + ---------- + missing : {any, wmo, pct, at_least_n, skip, from_context} + The name of the missing value method. See `xclim.core.missing.MissingBase` to create new custom methods. If + None, this will be determined by the global configuration (see `xclim.set_options`). Defaults to "from_context". + missing_options : dict, optional + Arguments to pass to the `missing` function. If None, this will be determined by the global configuration. + allowed_periods : Sequence[str], optional + A list of allowed periods, i.e. base parts of the `freq` parameter. For example, indicators meant to be + computed annually only will have `allowed_periods=["A"]`. `None` means "any period" or that the + indicator doesn't take a `freq` argument. + """ + + allowed_periods: list[str] | None = None + + @classmethod + def _ensure_correct_parameters(cls, parameters): + if "freq" not in parameters: + raise ValueError( + "ResamplingIndicator require a 'freq' argument, use the base Indicator" + " class if your computation doesn't perform any resampling." + ) + return super()._ensure_correct_parameters(parameters) + + def _get_missing_freq(self, params): + return params["freq"] + + def _preprocess_and_checks(self, das, params): + """Perform parent's checks and also check if freq is allowed.""" + das, params = super()._preprocess_and_checks(das, params) + + # Check if the period is allowed: + if ( + self.allowed_periods is not None + and parse_offset(params["freq"])[1] not in self.allowed_periods + ): + raise ValueError( + f"Resampling frequency {params['freq']} is not allowed for indicator " + f"{self.identifier} (needs something equivalent to one " + f"of {self.allowed_periods})." + ) + + return das, params + + +class IndexingIndicator(Indicator): + """Indicator that also injects "indexer" kwargs to subset the inputs before computation.""" @classmethod def _injected_parameters(cls): @@ -1490,6 +1538,12 @@ def _preprocess_and_checks(self, das: dict[str, DataArray], params: dict[str, An return das, params +class ResamplingIndicatorWithIndexing(ResamplingIndicator, IndexingIndicator): + """Resampling indicator that also injects "indexer" kwargs to subset the inputs before computation.""" + + pass + + class Daily(ResamplingIndicator): """Class for daily inputs and resampling computes.""" @@ -1503,6 +1557,8 @@ class Hourly(ResamplingIndicator): base_registry["Indicator"] = Indicator +base_registry["ReducingIndicator"] = ReducingIndicator +base_registry["IndexingIndicator"] = IndexingIndicator base_registry["ResamplingIndicator"] = ResamplingIndicator base_registry["ResamplingIndicatorWithIndexing"] = ResamplingIndicatorWithIndexing base_registry["Hourly"] = Hourly diff --git a/xclim/core/missing.py b/xclim/core/missing.py index 0e2e00923..66a627c5f 100644 --- a/xclim/core/missing.py +++ b/xclim/core/missing.py @@ -27,7 +27,13 @@ import numpy as np import xarray as xr -from .calendar import date_range, get_calendar, select_time +from .calendar import ( + date_range, + get_calendar, + is_offset_divisor, + parse_offset, + select_time, +) from .options import ( CHECK_MISSING, MISSING_METHODS, @@ -104,9 +110,9 @@ def prepare(self, da, freq, src_timestep, **indexer): da : xr.DataArray Input data. freq : str - Resampling frequency defining the periods defined in :ref:`timeseries.resampling`. - src_timestep : {"D", "H"} - Expected input frequency. + Resampling frequency, from the periods defined in :ref:`timeseries.resampling`. + src_timestep : str + Expected input frequency, from the periods defined in :ref:`timeseries.resampling`. \*\*indexer : {dim: indexer}, optional Time attribute and values over which to subset the array. For example, use season='DJF' to select winter values, month=1 to select January, or month=[6,7,8] to select summer months. If not indexer is given, @@ -141,7 +147,14 @@ def prepare(self, da, freq, src_timestep, **indexer): start_time = i[:1] end_time = i[-1:] - if indexer or "M" in src_timestep: + if freq is not None and not is_offset_divisor(src_timestep, freq): + raise NotImplementedError( + "Missing checks not implemented for timeseries resampled to a frequency that is not " + f"aligned with the source timestep. {src_timestep} is not a divisor of {freq}." + ) + + offset = parse_offset(src_timestep) + if indexer or offset[1] in "YAQM": # Create a full synthetic time series and compare the number of days with the original series. t = date_range( start_time[0], diff --git a/xclim/indicators/generic/_stats.py b/xclim/indicators/generic/_stats.py index ec932d415..d8d8c6555 100644 --- a/xclim/indicators/generic/_stats.py +++ b/xclim/indicators/generic/_stats.py @@ -1,6 +1,6 @@ from __future__ import annotations -from xclim.core.indicator import Indicator, ResamplingIndicator +from xclim.core.indicator import ReducingIndicator, ResamplingIndicator from xclim.indices.generic import select_resample_op from xclim.indices.stats import fit as _fit from xclim.indices.stats import frequency_analysis @@ -8,7 +8,7 @@ __all__ = ["fit", "return_level", "stats"] -class Generic(Indicator): +class Generic(ReducingIndicator): realm = "generic" @@ -30,7 +30,7 @@ class GenericResampling(ResamplingIndicator): ) -return_level = GenericResampling( +return_level = Generic( title="Return level from frequency analysis", identifier="return_level", var_name="fa_{window}{mode:r}{indexer}", @@ -40,7 +40,6 @@ class GenericResampling(ResamplingIndicator): abstract="Frequency analysis on the basis of a given mode and distribution.", compute=frequency_analysis, src_freq="D", - missing="skip", ) @@ -51,6 +50,5 @@ class GenericResampling(ResamplingIndicator): long_name="Daily statistics", description="{freq} {op} of daily values ({indexer}).", compute=select_resample_op, - missing="any", src_freq="D", )