diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst index a45f40d5cf..b6893a0d09 100644 --- a/docs/src/whatsnew/latest.rst +++ b/docs/src/whatsnew/latest.rst @@ -38,6 +38,13 @@ This document explains the changes made to Iris for this release and :class:`~iris.coord_systems.RotatedMercator` coordinate systems, complete with NetCDF loading and saving. (:pull:`5548`) +#. `@trexfeathers`_ added the ``use_year_at_season_start`` parameter to + :func:`iris.coord_categorisation.add_season_year`. When + ``use_year_at_season_start==True``: seasons spanning the year boundary (e.g. + Winter - December to February) will be assigned to the preceding year (e.g. + the year of December) instead of the following year (the default behaviour). + (:pull:`5573`) + 🐛 Bugs Fixed ============= diff --git a/lib/iris/coord_categorisation.py b/lib/iris/coord_categorisation.py index 698b4828f1..b6cc79f253 100644 --- a/lib/iris/coord_categorisation.py +++ b/lib/iris/coord_categorisation.py @@ -271,20 +271,33 @@ def _validate_seasons(seasons): return -def _month_year_adjusts(seasons): - """Compute the year adjustments required for each month. +def _month_year_adjusts(seasons, use_year_at_season_start=False): + """ + Compute the year adjustments required for each month. - These determine whether the month belongs to a season in the same - year or is in the start of a season that counts towards the next - year. + These adjustments ensure that no season spans two years by assigning months + to the **next** year (use_year_at_season_start is False) or the + **previous** year (use_year_at_season_start is True). E.g. Winter - djf: + either assign Dec to the next year, or Jan and Feb to the previous year. """ - month_year_adjusts = [None, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + # 1 'slot' for each month, with an extra leading 'slot' because months + # are 1-indexed - January is 1, therefore corresponding to the 2nd + # array index. + month_year_adjusts = np.zeros(13, dtype=int) + for season in seasons: - months = _months_in_season(season) - for month in months: - if month > months[-1]: - month_year_adjusts[month] = 1 + months = np.array(_months_in_season(season)) + if use_year_at_season_start: + months_to_shift = months < months[0] + year_shift = -1 + else: + # Sending forwards. + months_to_shift = months > months[-1] + year_shift = 1 + indices_to_shift = months[np.flatnonzero(months_to_shift)] + month_year_adjusts[indices_to_shift] = year_shift + return month_year_adjusts @@ -383,34 +396,40 @@ def _season_number(coord, value): def add_season_year( - cube, coord, name="season_year", seasons=("djf", "mam", "jja", "son") + cube, + coord, + name="season_year", + seasons=("djf", "mam", "jja", "son"), + use_year_at_season_start=False, ): """ - Add a categorical year-of-season coordinate, with user specified - seasons. - - Args: - - * cube (:class:`iris.cube.Cube`): - The cube containing 'coord'. The new coord will be added into - it. - * coord (:class:`iris.coords.Coord` or string): - Coordinate in 'cube', or its name, representing time. - - Kwargs: - - * name (string): - Name of the created coordinate. Defaults to "season_year". - * seasons (:class:`list` of strings): + Add a categorical year-of-season coordinate, with user specified seasons. + + Parameters + ---------- + cube : :class:`iris.cube.Cube` + The cube containing `coord`. The new coord will be added into it. + coord : :class:`iris.coords.Coord` or str + Coordinate in `cube`, or its name, representing time. + name : str, default="season_year" + Name of the created coordinate. + seasons : tuple of str, default=("djf", "mam", "jja", "son") List of seasons defined by month abbreviations. Each month must appear once and only once. Defaults to standard meteorological - seasons ('djf', 'mam', 'jja', 'son'). + seasons (``djf``, ``mam``, ``jja``, ``son``). + use_year_at_season_start: bool, default=False + Seasons spanning the year boundary (e.g. Winter ``djf``) will belong + fully to the following year by default (e.g. the year of Jan and Feb). + Set to ``True`` for spanning seasons to belong to the preceding + year (e.g. the year of Dec) instead. """ # Check that the seasons are valid. _validate_seasons(seasons) # Define the adjustments to be made to the year. - month_year_adjusts = _month_year_adjusts(seasons) + month_year_adjusts = _month_year_adjusts( + seasons, use_year_at_season_start=use_year_at_season_start + ) # Define a categorisation function. def _season_year(coord, value): diff --git a/lib/iris/tests/test_coord_categorisation.py b/lib/iris/tests/test_coord_categorisation.py deleted file mode 100644 index 0206ba66a5..0000000000 --- a/lib/iris/tests/test_coord_categorisation.py +++ /dev/null @@ -1,197 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the LGPL license. -# See COPYING and COPYING.LESSER in the root of the repository for full -# licensing details. -""" -Test the coordinate categorisation functions. -""" - -# import iris tests first so that some things can be initialised before importing anything else -import iris.tests as tests # isort:skip - -import warnings - -import cf_units -import numpy as np - -import iris -import iris.coord_categorisation as ccat - -CATEGORISATION_FUNCS = ( - ccat.add_day_of_month, - ccat.add_day_of_year, - ccat.add_weekday, - ccat.add_weekday_fullname, - ccat.add_weekday_number, - ccat.add_month, - ccat.add_month_fullname, - ccat.add_month_number, - ccat.add_year, - ccat.add_season, - ccat.add_season_number, - ccat.add_season_year, - ccat.add_season_membership, -) - - -class TestCategorisations(tests.IrisTest): - def setUp(self): - # make a series of 'day numbers' for the time, that slide across month - # boundaries - day_numbers = np.arange(0, 600, 27, dtype=np.int32) - - cube = iris.cube.Cube( - day_numbers, long_name="test cube", units="metres" - ) - - # use day numbers as data values also (don't actually use this for - # anything) - cube.data = day_numbers - - time_coord = iris.coords.DimCoord( - day_numbers, - standard_name="time", - units=cf_units.Unit("days since epoch", "standard"), - ) - cube.add_dim_coord(time_coord, 0) - - self.cube = cube - self.time_coord = time_coord - - def test_bad_coord(self): - for func in CATEGORISATION_FUNCS: - kwargs = {"name": "my_category"} - if func is ccat.add_season_membership: - kwargs["season"] = "djf" - with self.assertRaises(iris.exceptions.CoordinateNotFoundError): - func(self.cube, "DOES NOT EXIST", **kwargs) - - def test_explicit_result_names(self): - result_name = "my_category" - fmt = "Missing/incorrectly named result for {0!r}" - for func in CATEGORISATION_FUNCS: - # Specify source coordinate by name - cube = self.cube.copy() - kwargs = {"name": result_name} - if func is ccat.add_season_membership: - kwargs["season"] = "djf" - with warnings.catch_warnings(record=True): - func(cube, "time", **kwargs) - result_coords = cube.coords(result_name) - self.assertEqual(len(result_coords), 1, fmt.format(func.__name__)) - # Specify source coordinate by coordinate reference - cube = self.cube.copy() - time = cube.coord("time") - with warnings.catch_warnings(record=True): - func(cube, time, **kwargs) - result_coords = cube.coords(result_name) - self.assertEqual(len(result_coords), 1, fmt.format(func.__name__)) - - def test_basic(self): - cube = self.cube - time_coord = self.time_coord - - ccat.add_year(cube, time_coord, "my_year") - ccat.add_day_of_month(cube, time_coord, "my_day_of_month") - ccat.add_day_of_year(cube, time_coord, "my_day_of_year") - - ccat.add_month(cube, time_coord, "my_month") - ccat.add_month_fullname(cube, time_coord, "my_month_fullname") - ccat.add_month_number(cube, time_coord, "my_month_number") - - ccat.add_weekday(cube, time_coord, "my_weekday") - ccat.add_weekday_number(cube, time_coord, "my_weekday_number") - ccat.add_weekday_fullname(cube, time_coord, "my_weekday_fullname") - - ccat.add_season(cube, time_coord, "my_season") - ccat.add_season_number(cube, time_coord, "my_season_number") - ccat.add_season_year(cube, time_coord, "my_season_year") - - # also test 'generic' categorisation interface - def _month_in_quarter(coord, pt_value): - date = coord.units.num2date(pt_value) - return (date.month - 1) % 3 - - ccat.add_categorised_coord( - cube, "my_month_in_quarter", time_coord, _month_in_quarter - ) - - # To ensure consistent results between 32-bit and 64-bit - # platforms, ensure all the numeric categorisation coordinates - # are always stored as int64. - for coord in cube.coords(): - if coord.long_name is not None and coord.points.dtype.kind == "i": - coord.points = coord.points.astype(np.int64) - - # check values - self.assertCML(cube, ("categorisation", "quickcheck.cml")) - - def test_add_season_nonstandard(self): - # season categorisations work for non-standard seasons? - cube = self.cube - time_coord = self.time_coord - seasons = ["djfm", "amjj", "ason"] - ccat.add_season(cube, time_coord, name="seasons", seasons=seasons) - ccat.add_season_number( - cube, time_coord, name="season_numbers", seasons=seasons - ) - ccat.add_season_year( - cube, time_coord, name="season_years", seasons=seasons - ) - self.assertCML(cube, ("categorisation", "customcheck.cml")) - - def test_add_season_membership(self): - # season membership identifies correct seasons? - season = "djf" - ccat.add_season_membership(self.cube, "time", season, name="in_season") - ccat.add_season(self.cube, "time") - coord_season = self.cube.coord("season") - coord_membership = self.cube.coord("in_season") - season_locations = np.where(coord_season.points == season)[0] - membership_locations = np.where(coord_membership.points)[0] - self.assertArrayEqual(membership_locations, season_locations) - - def test_add_season_invalid_spec(self): - # custom seasons with an invalid season raises an error? - seasons = ("djf", "maj", "jja", "son") # MAJ not a season! - for func in ( - ccat.add_season, - ccat.add_season_year, - ccat.add_season_number, - ): - with self.assertRaises(ValueError): - func(self.cube, "time", name="my_category", seasons=seasons) - - def test_add_season_repeated_months(self): - # custom seasons with repeated months raises an error? - seasons = ("djfm", "mam", "jja", "son") - for func in ( - ccat.add_season, - ccat.add_season_year, - ccat.add_season_number, - ): - with self.assertRaises(ValueError): - func(self.cube, "time", name="my_category", seasons=seasons) - - def test_add_season_missing_months(self): - # custom seasons with missing months raises an error? - seasons = ("djfm", "amjj") - for func in ( - ccat.add_season, - ccat.add_season_year, - ccat.add_season_number, - ): - with self.assertRaises(ValueError): - func(self.cube, "time", name="my_category", seasons=seasons) - - def test_add_season_membership_invalid_spec(self): - season = "maj" # not a season! - with self.assertRaises(ValueError): - ccat.add_season_membership( - self.cube, "time", season, name="maj_season" - ) - - -if __name__ == "__main__": - tests.main() diff --git a/lib/iris/tests/unit/coord_categorisation/test_coord_categorisation.py b/lib/iris/tests/unit/coord_categorisation/test_coord_categorisation.py new file mode 100644 index 0000000000..fbc3514147 --- /dev/null +++ b/lib/iris/tests/unit/coord_categorisation/test_coord_categorisation.py @@ -0,0 +1,252 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the LGPL license. +# See COPYING and COPYING.LESSER in the root of the repository for full +# licensing details. +""" +Test the coordinate categorisation functions. +""" + +import warnings + +import cf_units +import numpy as np +import pytest + +import iris +import iris.coord_categorisation as ccat +import iris.coords +import iris.cube +import iris.exceptions +from iris.tests import IrisTest + + +@pytest.fixture( + scope="module", + params=( + ccat.add_day_of_month, + ccat.add_day_of_year, + ccat.add_weekday, + ccat.add_weekday_fullname, + ccat.add_weekday_number, + ccat.add_month, + ccat.add_month_fullname, + ccat.add_month_number, + ccat.add_year, + ccat.add_season, + ccat.add_season_number, + ccat.add_season_year, + ccat.add_season_membership, + ), +) +def categorisation_func(request): + return request.param + + +@pytest.fixture( + scope="module", + params=( + ccat.add_season, + ccat.add_season_number, + ccat.add_season_year, + ), +) +def season_cat_func(request): + return request.param + + +@pytest.fixture(scope="module") +def day_numbers(): + # make a series of 'day numbers' for the time, that slide across month + # boundaries + return np.arange(0, 600, 27, dtype=np.int32) + + +@pytest.fixture +def time_coord(day_numbers): + return iris.coords.DimCoord( + day_numbers, + standard_name="time", + units=cf_units.Unit("days since epoch", "standard"), + ) + + +@pytest.fixture +def cube(day_numbers, time_coord): + _cube = iris.cube.Cube(day_numbers, long_name="test cube", units="metres") + # use day numbers as data values also (don't actually use this for + # anything) + _cube.data = day_numbers + _cube.add_dim_coord(time_coord, 0) + return _cube + + +def test_bad_coord(cube, categorisation_func): + kwargs = {"name": "my_category"} + if categorisation_func is ccat.add_season_membership: + kwargs["season"] = "djf" + with pytest.raises(iris.exceptions.CoordinateNotFoundError): + categorisation_func(cube, "DOES NOT EXIST", **kwargs) + + +def test_explicit_result_names(cube, categorisation_func): + result_name = "my_category" + fmt = "Missing/incorrectly named result for {0!r}" + # Specify source coordinate by name + new_cube = cube.copy() + kwargs = {"name": result_name} + if categorisation_func is ccat.add_season_membership: + kwargs["season"] = "djf" + with warnings.catch_warnings(record=True): + categorisation_func(new_cube, "time", **kwargs) + result_coords = new_cube.coords(result_name) + assert len(result_coords) == 1, fmt.format(categorisation_func.__name__) + # Specify source coordinate by coordinate reference + new_cube = cube.copy() + time = new_cube.coord("time") + with warnings.catch_warnings(record=True): + categorisation_func(new_cube, time, **kwargs) + result_coords = new_cube.coords(result_name) + assert len(result_coords) == 1, fmt.format(categorisation_func.__name__) + + +def test_basic(cube, time_coord): + ccat.add_year(cube, time_coord, "my_year") + ccat.add_day_of_month(cube, time_coord, "my_day_of_month") + ccat.add_day_of_year(cube, time_coord, "my_day_of_year") + + ccat.add_month(cube, time_coord, "my_month") + ccat.add_month_fullname(cube, time_coord, "my_month_fullname") + ccat.add_month_number(cube, time_coord, "my_month_number") + + ccat.add_weekday(cube, time_coord, "my_weekday") + ccat.add_weekday_number(cube, time_coord, "my_weekday_number") + ccat.add_weekday_fullname(cube, time_coord, "my_weekday_fullname") + + ccat.add_season(cube, time_coord, "my_season") + ccat.add_season_number(cube, time_coord, "my_season_number") + ccat.add_season_year(cube, time_coord, "my_season_year") + + # also test 'generic' categorisation interface + def _month_in_quarter(coord, pt_value): + date = coord.units.num2date(pt_value) + return (date.month - 1) % 3 + + ccat.add_categorised_coord( + cube, "my_month_in_quarter", time_coord, _month_in_quarter + ) + + # To ensure consistent results between 32-bit and 64-bit + # platforms, ensure all the numeric categorisation coordinates + # are always stored as int64. + for coord in cube.coords(): + if coord.long_name is not None and coord.points.dtype.kind == "i": + coord.points = coord.points.astype(np.int64) + + # check values + IrisTest.assertCML(IrisTest(), cube, ("categorisation", "quickcheck.cml")) + + +def test_add_season_nonstandard(cube, time_coord): + # season categorisations work for non-standard seasons? + seasons = ["djfm", "amjj", "ason"] + ccat.add_season(cube, time_coord, name="seasons", seasons=seasons) + ccat.add_season_number( + cube, time_coord, name="season_numbers", seasons=seasons + ) + ccat.add_season_year( + cube, time_coord, name="season_years", seasons=seasons + ) + IrisTest.assertCML(IrisTest(), cube, ("categorisation", "customcheck.cml")) + + +@pytest.mark.parametrize("backwards", [None, False, True]) +@pytest.mark.parametrize( + "nonstandard", + [False, True], + ids=["standard_seasons", "nonstandard_seasons"], +) +def test_add_season_year(cube, time_coord, backwards, nonstandard): + """Specific test to account for the extra use_year_at_season_start argument.""" + + kwargs = dict( + cube=cube, + coord=time_coord, + name="season_years", + use_year_at_season_start=backwards, + ) + if nonstandard: + kwargs["seasons"] = ["ndjfm", "amjj", "aso"] + + # Based on the actual years of each date. + expected_years = np.array(([1970] * 14) + ([1971] * 9)) + # Subset to just the 'season' of interest. + season_slice = np.s_[12:17] + expected_years = expected_years[season_slice] + + # Single indices to examine to test the handling of specific months. + nov = 0 + dec = 1 + jan = 2 + feb = 3 + mar = 4 + + # Set the expected deviations from the actual date years. + if backwards is True: + expected_years[jan] = 1970 + expected_years[feb] = 1970 + if nonstandard: + expected_years[mar] = 1970 + else: + # Either False or None - False being the default behaviour. + expected_years[dec] = 1971 + if nonstandard: + expected_years[nov] = 1971 + + ccat.add_season_year(**kwargs) + actual_years = cube.coord(kwargs["name"]).points + # Subset to just the 'season' of interest. + actual_years = actual_years[season_slice] + + np.testing.assert_array_almost_equal(actual_years, expected_years) + + +def test_add_season_membership(cube): + # season membership identifies correct seasons? + season = "djf" + ccat.add_season_membership(cube, "time", season, name="in_season") + ccat.add_season(cube, "time") + coord_season = cube.coord("season") + coord_membership = cube.coord("in_season") + season_locations = np.where(coord_season.points == season)[0] + membership_locations = np.where(coord_membership.points)[0] + np.testing.assert_array_almost_equal( + membership_locations, season_locations + ) + + +def test_add_season_invalid_spec(cube, season_cat_func): + # custom seasons with an invalid season raises an error? + seasons = ("djf", "maj", "jja", "son") # MAJ not a season! + with pytest.raises(ValueError): + season_cat_func(cube, "time", name="my_category", seasons=seasons) + + +def test_add_season_repeated_months(cube, season_cat_func): + # custom seasons with repeated months raises an error? + seasons = ("djfm", "mam", "jja", "son") + with pytest.raises(ValueError): + season_cat_func(cube, "time", name="my_category", seasons=seasons) + + +def test_add_season_missing_months(cube, season_cat_func): + # custom seasons with missing months raises an error? + seasons = ("djfm", "amjj") + with pytest.raises(ValueError): + season_cat_func(cube, "time", name="my_category", seasons=seasons) + + +def test_add_season_membership_invalid_spec(cube): + season = "maj" # not a season! + with pytest.raises(ValueError): + ccat.add_season_membership(cube, "time", season, name="maj_season")