Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Calendar conversion #426

Merged
merged 7 commits into from
Apr 30, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
History
=======

0.17.x
------
* Added `convert_calendar` and `interp_calendar` to help in the conversion between calendars.

0.16.0 (2020-04-23)
-------------------
* Added `vectorize` flag to `subset_shape` and `create_mask_vectorize` function based on `shapely.vectorize` as default backend for mask creation.
Expand Down
4 changes: 2 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pytest
import xarray as xr

from xclim.core.calendar import calendars
from xclim.core.calendar import max_doy


@pytest.fixture
Expand Down Expand Up @@ -151,7 +151,7 @@ def ndq_series():
@pytest.fixture
def per_doy():
def _per_doy(values, calendar="standard", units="kg m-2 s-1"):
n = calendars[calendar]
n = max_doy[calendar]
if len(values) != n:
raise ValueError(
"Values must be same length as number of days in calendar."
Expand Down
254 changes: 225 additions & 29 deletions tests/test_calendar.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,13 @@
from xarray.coding.cftimeindex import CFTimeIndex

from xclim.core.calendar import adjust_doy_calendar
from xclim.core.calendar import infer_doy_max
from xclim.core.calendar import convert_calendar
from xclim.core.calendar import datetime_to_decimal_year
from xclim.core.calendar import days_in_year
from xclim.core.calendar import ensure_cftime_array
from xclim.core.calendar import get_calendar
from xclim.core.calendar import interp_calendar
from xclim.core.calendar import max_doy
from xclim.core.calendar import percentile_doy
from xclim.core.calendar import time_bnds

Expand Down Expand Up @@ -40,6 +46,12 @@ def da(index):
)


def date_range(*args, calendar="default", **kwargs):
if calendar == "default":
return pd.date_range(*args, **kwargs)
return xr.cftime_range(*args, calendar=calendar, **kwargs)


@pytest.mark.parametrize(
"freq", ["3A-MAY", "5Q-JUN", "7M", "6480H", "302431T", "23144781S"]
)
Expand All @@ -62,44 +74,228 @@ def test_time_bnds(freq, datetime_index, cftime_index):
assert_array_equal(cftime_ends, datetime_ends)


def test_percentile_doye(tas_series):
def test_percentile_doy(tas_series):
tas = tas_series(np.arange(365), start="1/1/2001")
tas = xr.concat((tas, tas), "dim0")
p1 = percentile_doy(tas, window=5, per=0.5)
assert p1.sel(dayofyear=3, dim0=0).data == 2
assert p1.attrs["units"] == "K"


class TestAdjustDoyCalendar:
def test_360_to_366(self):
source = xr.DataArray(
np.arange(360), coords=[np.arange(1, 361)], dims="dayofyear"
)
time = pd.date_range("2000-01-01", "2001-12-31", freq="D")
target = xr.DataArray(np.arange(len(time)), coords=[time], dims="time")
def test_adjust_doy_360_to_366():
source = xr.DataArray(np.arange(360), coords=[np.arange(1, 361)], dims="dayofyear")
time = pd.date_range("2000-01-01", "2001-12-31", freq="D")
target = xr.DataArray(np.arange(len(time)), coords=[time], dims="time")

out = adjust_doy_calendar(source, target)
out = adjust_doy_calendar(source, target)

assert out.sel(dayofyear=1) == source.sel(dayofyear=1)
assert out.sel(dayofyear=366) == source.sel(dayofyear=360)
assert out.sel(dayofyear=1) == source.sel(dayofyear=1)
assert out.sel(dayofyear=366) == source.sel(dayofyear=360)

def test_infer_doy_max(self):
fn = os.path.join(
TESTS_DATA,
"CanESM2_365day",
"pr_day_CanESM2_rcp85_r1i1p1_na10kgrid_qm-moving-50bins-detrend_2095.nc",
)
with xr.open_dataset(fn) as ds:
assert infer_doy_max(ds) == 365

fn = os.path.join(
TESTS_DATA,
"HadGEM2-CC_360day",
"pr_day_HadGEM2-CC_rcp85_r1i1p1_na10kgrid_qm-moving-50bins-detrend_2095.nc",
@pytest.mark.parametrize(
"file,cal,maxdoy",
[
(
(
"CanESM2_365day",
"pr_day_CanESM2_rcp85_r1i1p1_na10kgrid_qm-moving-50bins-detrend_2095.nc",
),
"noleap",
365,
),
(
(
"HadGEM2-CC_360day",
"pr_day_HadGEM2-CC_rcp85_r1i1p1_na10kgrid_qm-moving-50bins-detrend_2095.nc",
),
"360_day",
360,
),
(("NRCANdaily", "nrcan_canada_daily_pr_1990.nc"), "default", 366),
],
)
def test_get_calendar(file, cal, maxdoy):
with xr.open_dataset(os.path.join(TESTS_DATA, *file)) as ds:
out_cal = get_calendar(ds)
assert cal == out_cal
assert max_doy[cal] == maxdoy


@pytest.mark.parametrize(
"source,target,target_as_str,freq",
[
("standard", "noleap", True, "D"),
("noleap", "default", True, "D"),
("noleap", "all_leap", False, "D"),
("proleptic_gregorian", "noleap", False, "4H"),
("default", "noleap", True, "4H"),
],
)
def test_convert_calendar(source, target, target_as_str, freq):
src = xr.DataArray(
date_range("2004-01-01", "2004-12-31", freq=freq, calendar=source),
dims=("time",),
name="time",
)
da_src = xr.DataArray(
np.linspace(0, 1, src.size), dims=("time",), coords={"time": src}
)
tgt = xr.DataArray(
date_range("2004-01-01", "2004-12-31", freq=freq, calendar=target),
dims=("time",),
name="time",
)

conv = convert_calendar(da_src, target if target_as_str else tgt)

assert get_calendar(conv) == target

if target_as_str and max_doy[source] < max_doy[target]:
assert conv.size == src.size
elif not target_as_str:
assert conv.size == tgt.size

assert conv.isnull().sum() == max(max_doy[target] - max_doy[source], 0)


@pytest.mark.parametrize(
"source,target,freq",
[
("standard", "360_day", "D"),
("360_day", "default", "D"),
("proleptic_gregorian", "360_day", "4H"),
],
)
@pytest.mark.parametrize("align_on", ["date", "year"])
def test_convert_calendar_360_days(source, target, freq, align_on):
src = xr.DataArray(
date_range("2004-01-01", "2004-12-30", freq=freq, calendar=source),
dims=("time",),
name="time",
)
da_src = xr.DataArray(
np.linspace(0, 1, src.size), dims=("time",), coords={"time": src}
)

conv = convert_calendar(da_src, target, align_on=align_on)

assert get_calendar(conv) == target

if align_on == "date":
np.testing.assert_array_equal(
conv.time.resample(time="M").last().dt.day,
[30, 29, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30],
)
elif target == "360_day":
np.testing.assert_array_equal(
conv.time.resample(time="M").last().dt.day,
[30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 29],
)
else:
np.testing.assert_array_equal(
conv.time.resample(time="M").last().dt.day,
[30, 29, 30, 30, 31, 30, 30, 31, 30, 31, 29, 31],
)
with xr.open_dataset(fn) as ds:
assert infer_doy_max(ds) == 360
if source == "360_day" and align_on == "year":
assert conv.size == 360 if freq == "D" else 360 * 4
else:
assert conv.size == 359 if freq == "D" else 359 * 4


@pytest.mark.parametrize(
"source,target",
[
("standard", "noleap"),
("noleap", "default"),
("standard", "360_day"),
("360_day", "gregorian"),
("noleap", "all_leap"),
("360_day", "noleap"),
],
)
def test_interp_calendar(source, target):
src = xr.DataArray(
date_range("2004-01-01", "2004-07-30", freq="D", calendar=source),
dims=("time",),
name="time",
)
tgt = xr.DataArray(
date_range("2004-01-01", "2004-07-30", freq="D", calendar=target),
dims=("time",),
name="time",
)
da_src = xr.DataArray(
np.linspace(0, 1, src.size), dims=("time",), coords={"time": src}
)
conv = interp_calendar(da_src, tgt)

assert conv.size == tgt.size
assert get_calendar(conv) == target

np.testing.assert_almost_equal(conv.max(), 1, 2)
assert conv.min() == 0

fn = os.path.join(TESTS_DATA, "NRCANdaily", "nrcan_canada_daily_pr_1990.nc")
with xr.open_dataset(fn) as ds:
assert infer_doy_max(ds) == 366

@pytest.mark.parametrize(
"inp,calout",
[
(
xr.DataArray(
date_range("2004-01-01", "2004-01-10", freq="D"),
dims=("time",),
name="time",
),
"gregorian",
),
(date_range("2004-01-01", "2004-01-10", freq="D"), "gregorian"),
(
xr.DataArray(date_range("2004-01-01", "2004-01-10", freq="D")).values,
"gregorian",
),
(date_range("2004-01-01", "2004-01-10", freq="D"), "gregorian"),
(date_range("2004-01-01", "2004-01-10", freq="D", calendar="julian"), "julian"),
],
)
def test_ensure_cftime_array(inp, calout):
out = ensure_cftime_array(inp)
assert out[0].calendar == calout


@pytest.mark.parametrize(
"year,calendar,exp",
[
(2004, "standard", 366),
(2004, "noleap", 365),
(2004, "all_leap", 366),
(1500, "default", 365),
(1500, "gregorian", 366),
(1500, "proleptic_gregorian", 365),
(2030, "360_day", 360),
],
)
def test_days_in_year(year, calendar, exp):
assert days_in_year(year, calendar) == exp


@pytest.mark.parametrize(
"source_cal, exp180",
[
("standard", 0.49180328),
("default", 0.49180328),
("noleap", 0.49315068),
("all_leap", 0.49180328),
("360_day", 0.5),
(None, 0.49180328),
],
)
def test_datetime_to_decimal_year(source_cal, exp180):
times = xr.DataArray(
date_range(
"2004-01-01", "2004-12-30", freq="D", calendar=source_cal or "default"
),
dims=("time",),
name="time",
)
decy = datetime_to_decimal_year(times, calendar=source_cal)
np.testing.assert_almost_equal(decy[180] - 2004, exp180)
Loading