From a2b4fc4592f2dd9c063159d0977f5ab26c51545b Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Wed, 10 Jan 2024 13:59:00 -0500 Subject: [PATCH 1/7] Convert 360 calendar randomly --- xarray/coding/calendar_ops.py | 51 ++++++++++++++++++++++++++----- xarray/tests/test_calendar_ops.py | 36 ++++++++++++++++++++++ 2 files changed, 79 insertions(+), 8 deletions(-) diff --git a/xarray/coding/calendar_ops.py b/xarray/coding/calendar_ops.py index dc2f95b832e..6038bbf08b0 100644 --- a/xarray/coding/calendar_ops.py +++ b/xarray/coding/calendar_ops.py @@ -64,7 +64,7 @@ def convert_calendar( The target calendar name. dim : str Name of the time coordinate in the input DataArray or Dataset. - align_on : {None, 'date', 'year'} + align_on : {None, 'date', 'year', 'random'} Must be specified when either the source or target is a `"360_day"` calendar; ignored otherwise. See Notes. missing : any, optional @@ -143,6 +143,16 @@ def convert_calendar( will be dropped as there are no equivalent dates in a standard calendar. This option is best used with data on a frequency coarser than daily. + + "random" + Similar to "year", each day of year of the source is mapped to another day of year + of the target. However, instead of having always the same missing days according + the source and target years, here 5 days are chosen randomly, one for each fifth + of the year. However, February 29th is always missing when converting to a leap year, + or its value is dropped when converting from a leap year. This is similar to method + used in the LOCA dataset (see Pierce, Cayan, and Thrasher (2014). doi:10.1175/JHM-D-14-0082.1). + + This option is best used on daily data. """ from xarray.core.dataarray import DataArray @@ -174,14 +184,18 @@ def convert_calendar( out = obj.copy() - if align_on == "year": + if align_on in ["year", "random"]: # Special case for conversion involving 360_day calendar - # Instead of translating dates directly, this tries to keep the position within a year similar. - - new_doy = time.groupby(f"{dim}.year").map( - _interpolate_day_of_year, target_calendar=calendar, use_cftime=use_cftime - ) - + if align_on == "year": + # Instead of translating dates directly, this tries to keep the position within a year similar. + new_doy = time.groupby(f"{dim}.year").map( + _interpolate_day_of_year, target_calendar=calendar, use_cftime=use_cftime, + ) + elif align_on == "random": + # The 5 days to remove are randomly chosen, one for each of the five 72-days periods of the year. + new_doy = time.groupby(f"{dim}.year").map( + _random_day_of_year, target_calendar=calendar, use_cftime=use_cftime + ) # Convert the source datetimes, but override the day of year with our new day of years. out[dim] = DataArray( [ @@ -229,6 +243,27 @@ def _interpolate_day_of_year(time, target_calendar, use_cftime): ).astype(int) +def _random_day_of_year(time, target_calendar, use_cftime, rng=None): + """Return a day of year in the new calendar. + + Removes Feb 29th and five other days chosen randomly within five sections of 72 days. + """ + year = int(time.dt.year[0]) + source_calendar = time.dt.calendar + new_doy = np.arange(360) + 1 + rm_idx = (rng or np.random.default_rng()).integers(0, 72, 5) + (np.arange(5) * 72) + if source_calendar == "360_day": + for idx in rm_idx: + new_doy[idx + 1 :] = new_doy[idx + 1 :] + 1 + if _days_in_year(year, target_calendar, use_cftime) == 366: + new_doy[new_doy >= 60] = new_doy[new_doy >= 60] + 1 + elif target_calendar == "360_day": + new_doy = np.insert(new_doy, rm_idx - np.arange(5), -1) + if _days_in_year(year, source_calendar, use_cftime) == 366: + new_doy = np.insert(new_doy, 60, -1) + return new_doy[time.dt.dayofyear - 1] + + def _convert_to_new_calendar_with_new_day_of_year( date, day_of_year, calendar, use_cftime ): diff --git a/xarray/tests/test_calendar_ops.py b/xarray/tests/test_calendar_ops.py index ab0ee8d0f71..405a4753055 100644 --- a/xarray/tests/test_calendar_ops.py +++ b/xarray/tests/test_calendar_ops.py @@ -108,6 +108,42 @@ def test_convert_calendar_360_days(source, target, freq, align_on): assert conv.size == 359 if freq == "D" else 359 * 4 +def test_convert_calendar_360_days_random(): + da_std = DataArray( + np.linspace(0, 1, 366 * 2), + dims=("time",), + coords={ + "time": date_range( + "2004-01-01", "2004-12-31T23:59:59", freq="12H", calendar="standard", use_cftime=False + ) + }, + ) + da_360 = DataArray( + np.linspace(0, 1, 360 * 2), + dims=("time",), + coords={ + "time": date_range( + "2004-01-01", "2004-12-30T23:59:59", freq="12H", calendar="360_day" + ) + }, + ) + + conv = convert_calendar(da_std, "360_day", align_on="random") + conv2 = convert_calendar(da_std, "360_day", align_on="random") + assert (conv != conv2).any() + + conv = convert_calendar(da_360, "standard", use_cftime=False, align_on="random") + assert np.datetime64("2004-02-29") not in conv.time + conv2 = convert_calendar(da_360, "standard", use_cftime=False, align_on="random") + assert (conv2 != conv).any() + + # Ensure that added days are evenly distributed in the 5 fifths of each year + conv = convert_calendar(da_360, "noleap", align_on="random", missing=np.NaN) + conv = conv.where(conv.isnull(), drop=True) + nandoys = conv.time.dt.dayofyear[::2] + assert all(nandoys < np.array([74, 147, 220, 293, 366])) + assert all(nandoys > np.array([0, 73, 146, 219, 292])) + @requires_cftime @pytest.mark.parametrize( "source,target,freq", From 31324d8bf1ebe5d20597269c621878bb8456061b Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Wed, 10 Jan 2024 14:03:23 -0500 Subject: [PATCH 2/7] add note to whats new --- doc/whats-new.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index ba8856e178b..277b77d68e0 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -30,6 +30,8 @@ New Features See `netCDF4 documentation `_ for details. By `Markel García-Díez `_. (:issue:`6929`, :pull:`7551`) Note that some new compression filters needs plugins to be installed which may not be available in all netCDF distributions. +- New "random" method for converting to and from 360_day calendars. + By `Pascal Bourgault `_. Breaking changes ~~~~~~~~~~~~~~~~ From 2e7e3c5de4f8bd0836dda3a131c7f66162559b86 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Wed, 10 Jan 2024 14:14:42 -0500 Subject: [PATCH 3/7] add pull number to whats new entry --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 277b77d68e0..3d6eab8eda0 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -30,7 +30,7 @@ New Features See `netCDF4 documentation `_ for details. By `Markel García-Díez `_. (:issue:`6929`, :pull:`7551`) Note that some new compression filters needs plugins to be installed which may not be available in all netCDF distributions. -- New "random" method for converting to and from 360_day calendars. +- New "random" method for converting to and from 360_day calendars (:pull:`8603`). By `Pascal Bourgault `_. Breaking changes From e0a9c0516e6e13b08deab0caee988f620b19107d Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Wed, 10 Jan 2024 14:19:03 -0500 Subject: [PATCH 4/7] run pre-commit --- xarray/coding/calendar_ops.py | 6 ++++-- xarray/tests/test_calendar_ops.py | 7 ++++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/xarray/coding/calendar_ops.py b/xarray/coding/calendar_ops.py index 6038bbf08b0..193110da90c 100644 --- a/xarray/coding/calendar_ops.py +++ b/xarray/coding/calendar_ops.py @@ -186,10 +186,12 @@ def convert_calendar( if align_on in ["year", "random"]: # Special case for conversion involving 360_day calendar - if align_on == "year": + if align_on == "year": # Instead of translating dates directly, this tries to keep the position within a year similar. new_doy = time.groupby(f"{dim}.year").map( - _interpolate_day_of_year, target_calendar=calendar, use_cftime=use_cftime, + _interpolate_day_of_year, + target_calendar=calendar, + use_cftime=use_cftime, ) elif align_on == "random": # The 5 days to remove are randomly chosen, one for each of the five 72-days periods of the year. diff --git a/xarray/tests/test_calendar_ops.py b/xarray/tests/test_calendar_ops.py index 405a4753055..31adc51f382 100644 --- a/xarray/tests/test_calendar_ops.py +++ b/xarray/tests/test_calendar_ops.py @@ -114,7 +114,11 @@ def test_convert_calendar_360_days_random(): dims=("time",), coords={ "time": date_range( - "2004-01-01", "2004-12-31T23:59:59", freq="12H", calendar="standard", use_cftime=False + "2004-01-01", + "2004-12-31T23:59:59", + freq="12H", + calendar="standard", + use_cftime=False, ) }, ) @@ -144,6 +148,7 @@ def test_convert_calendar_360_days_random(): assert all(nandoys < np.array([74, 147, 220, 293, 366])) assert all(nandoys > np.array([0, 73, 146, 219, 292])) + @requires_cftime @pytest.mark.parametrize( "source,target,freq", From 51d853b4a8773714671f2940cc4b2c780d9d4093 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Wed, 10 Jan 2024 15:47:35 -0500 Subject: [PATCH 5/7] Change test to use recommended freq --- xarray/tests/test_calendar_ops.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/xarray/tests/test_calendar_ops.py b/xarray/tests/test_calendar_ops.py index 31adc51f382..f99ff7b93be 100644 --- a/xarray/tests/test_calendar_ops.py +++ b/xarray/tests/test_calendar_ops.py @@ -110,25 +110,23 @@ def test_convert_calendar_360_days(source, target, freq, align_on): def test_convert_calendar_360_days_random(): da_std = DataArray( - np.linspace(0, 1, 366 * 2), + np.linspace(0, 1, 366), dims=("time",), coords={ "time": date_range( "2004-01-01", - "2004-12-31T23:59:59", - freq="12H", + "2004-12-31", + freq="D", calendar="standard", use_cftime=False, ) }, ) da_360 = DataArray( - np.linspace(0, 1, 360 * 2), + np.linspace(0, 1, 360), dims=("time",), coords={ - "time": date_range( - "2004-01-01", "2004-12-30T23:59:59", freq="12H", calendar="360_day" - ) + "time": date_range("2004-01-01", "2004-12-30", freq="D", calendar="360_day") }, ) @@ -144,7 +142,7 @@ def test_convert_calendar_360_days_random(): # Ensure that added days are evenly distributed in the 5 fifths of each year conv = convert_calendar(da_360, "noleap", align_on="random", missing=np.NaN) conv = conv.where(conv.isnull(), drop=True) - nandoys = conv.time.dt.dayofyear[::2] + nandoys = conv.time.dt.dayofyear[:366] assert all(nandoys < np.array([74, 147, 220, 293, 366])) assert all(nandoys > np.array([0, 73, 146, 219, 292])) From 8279d657718db4517362f173fe2267521289038f Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Mon, 15 Apr 2024 10:13:47 -0400 Subject: [PATCH 6/7] Apply suggestions from code review Co-authored-by: Spencer Clark --- xarray/coding/calendar_ops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/coding/calendar_ops.py b/xarray/coding/calendar_ops.py index 193110da90c..5bbe2aa4f10 100644 --- a/xarray/coding/calendar_ops.py +++ b/xarray/coding/calendar_ops.py @@ -149,7 +149,7 @@ def convert_calendar( of the target. However, instead of having always the same missing days according the source and target years, here 5 days are chosen randomly, one for each fifth of the year. However, February 29th is always missing when converting to a leap year, - or its value is dropped when converting from a leap year. This is similar to method + or its value is dropped when converting from a leap year. This is similar to the method used in the LOCA dataset (see Pierce, Cayan, and Thrasher (2014). doi:10.1175/JHM-D-14-0082.1). This option is best used on daily data. @@ -253,7 +253,7 @@ def _random_day_of_year(time, target_calendar, use_cftime, rng=None): year = int(time.dt.year[0]) source_calendar = time.dt.calendar new_doy = np.arange(360) + 1 - rm_idx = (rng or np.random.default_rng()).integers(0, 72, 5) + (np.arange(5) * 72) + rm_idx = np.random.default_rng().integers(0, 72, 5) + 72 * np.arange(5) if source_calendar == "360_day": for idx in rm_idx: new_doy[idx + 1 :] = new_doy[idx + 1 :] + 1 From b531f0e3eea54fac4bd980ec2563edb804e95664 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Mon, 15 Apr 2024 10:26:13 -0400 Subject: [PATCH 7/7] Fix merge - remove rng arg --- doc/whats-new.rst | 2 -- xarray/coding/calendar_ops.py | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index bc86107954b..7411ed6168e 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -98,8 +98,6 @@ New Features By `Llorenç Lledó `_. - Accept the compression arguments new in netCDF 1.6.0 in the netCDF4 backend. See `netCDF4 documentation `_ for details. - By `Markel García-Díez `_. (:issue:`6929`, :pull:`7551`) Note that some - new compression filters needs plugins to be installed which may not be available in all netCDF distributions. Note that some new compression filters needs plugins to be installed which may not be available in all netCDF distributions. By `Markel García-Díez `_. (:issue:`6929`, :pull:`7551`) - Add :py:meth:`DataArray.cumulative` & :py:meth:`Dataset.cumulative` to compute diff --git a/xarray/coding/calendar_ops.py b/xarray/coding/calendar_ops.py index 5bbe2aa4f10..c4fe9e1f4ae 100644 --- a/xarray/coding/calendar_ops.py +++ b/xarray/coding/calendar_ops.py @@ -245,7 +245,7 @@ def _interpolate_day_of_year(time, target_calendar, use_cftime): ).astype(int) -def _random_day_of_year(time, target_calendar, use_cftime, rng=None): +def _random_day_of_year(time, target_calendar, use_cftime): """Return a day of year in the new calendar. Removes Feb 29th and five other days chosen randomly within five sections of 72 days.