From d5c22427744fc11ac8f1a26f5a84412cba87d72e Mon Sep 17 00:00:00 2001 From: David Huard Date: Mon, 16 Dec 2019 12:29:42 -0500 Subject: [PATCH 01/36] add support for CFTimeIndex in get_clean_interp_index --- xarray/core/missing.py | 108 ++++++++++++++++++++------------- xarray/tests/test_dataarray.py | 1 + xarray/tests/test_missing.py | 22 +++++++ 3 files changed, 89 insertions(+), 42 deletions(-) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 117fcaf8f81..b8ecda69563 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -9,7 +9,7 @@ from . import utils from .common import _contains_datetime_like_objects, ones_like from .computation import apply_ufunc -from .duck_array_ops import dask_array_type +from .duck_array_ops import dask_array_type, datetime_to_numeric from .utils import OrderedSet, is_scalar from .variable import Variable, broadcast_variables @@ -207,52 +207,76 @@ def _apply_over_vars_with_dim(func, self, dim=None, **kwargs): def get_clean_interp_index(arr, dim: Hashable, use_coordinate: Union[str, bool] = True): - """get index to use for x values in interpolation. + """Return index to use for x values in interpolation or curve fitting. - If use_coordinate is True, the coordinate that shares the name of the - dimension along which interpolation is being performed will be used as the - x values. + Parameters + ---------- + arr : DataArray + Array to interpolate or fit to a curve. + dim : str + Name of dimension along which to fit. + use_coordinate : str or bool + If use_coordinate is True, the coordinate that shares the name of the + dimension along which interpolation is being performed will be used as the + x values. If False, the x values are set as an equally spaced sequence. + + Returns + ------- + Variable + Numerical values for the x-coordinates. - If use_coordinate is False, the x values are set as an equally spaced - sequence. + Notes + ----- + If indexing is along the time dimension, datetime coordinates are converted + to time deltas with respect to 1970-01-01. """ - if use_coordinate: - if use_coordinate is True: - index = arr.get_index(dim) - else: - index = arr.coords[use_coordinate] - if index.ndim != 1: - raise ValueError( - f"Coordinates used for interpolation must be 1D, " - f"{use_coordinate} is {index.ndim}D." - ) - index = index.to_index() - - # TODO: index.name is None for multiindexes - # set name for nice error messages below - if isinstance(index, pd.MultiIndex): - index.name = dim - - if not index.is_monotonic: - raise ValueError(f"Index {index.name!r} must be monotonically increasing") - - if not index.is_unique: - raise ValueError(f"Index {index.name!r} has duplicate values") - - # raise if index cannot be cast to a float (e.g. MultiIndex) - try: - index = index.values.astype(np.float64) - except (TypeError, ValueError): - # pandas raises a TypeError - # xarray/numpy raise a ValueError - raise TypeError( - f"Index {index.name!r} must be castable to float64 to support " - f"interpolation, got {type(index).__name__}." - ) - else: + # Question: If use_coordinate is a string, what role does `dim` play? + from xarray.coding.cftimeindex import CFTimeIndex + + if use_coordinate is False: axis = arr.get_axis_num(dim) - index = np.arange(arr.shape[axis], dtype=np.float64) + return np.arange(arr.shape[axis], dtype=np.float64) + + if use_coordinate is True: + index = arr.get_index(dim) + + else: # string + index = arr.coords[use_coordinate] + if index.ndim != 1: + raise ValueError( + f"Coordinates used for interpolation must be 1D, " + f"{use_coordinate} is {index.ndim}D." + ) + index = index.to_index() + + # TODO: index.name is None for multiindexes + # set name for nice error messages below + if isinstance(index, pd.MultiIndex): + index.name = dim + + if not index.is_monotonic: + raise ValueError(f"Index {index.name!r} must be monotonically increasing") + + if not index.is_unique: + raise ValueError(f"Index {index.name!r} has duplicate values") + + # Special case for non-standard calendar indexes + # Numerical datetime values are defined with respect to 1970-01-01T00:00:00 in units of days + if isinstance(index, CFTimeIndex): + offset = type(index[0])(1970, 1, 1) + index = Variable(data=datetime_to_numeric(index, offset=offset, datetime_unit="D"), dims=(dim,)) + + # raise if index cannot be cast to a float (e.g. MultiIndex) + try: + index = index.values.astype(np.float64) + except (TypeError, ValueError): + # pandas raises a TypeError + # xarray/numpy raise a ValueError + raise TypeError( + f"Index {index.name!r} must be castable to float64 to support " + f"interpolation, got {type(index).__name__}." + ) return index diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index f957316d8ac..1e1645dcdf1 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -4327,6 +4327,7 @@ def test_rolling_reduce(da, center, min_periods, window, name): assert actual.dims == expected.dims + @pytest.mark.parametrize("center", (True, False)) @pytest.mark.parametrize("min_periods", (None, 1, 2, 3)) @pytest.mark.parametrize("window", (1, 2, 3, 4)) diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 1cd0319a9a5..c97ff5a4ce1 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -21,13 +21,22 @@ requires_dask, requires_scipy, ) +from xarray.tests.test_cftime_offsets import calendar +from . import requires_cftime @pytest.fixture def da(): return xr.DataArray([0, np.nan, 1, 2, np.nan, 3, 4, 5, np.nan, 6, 7], dims="time") +@pytest.fixture +def cf_da(calendar): + times = xr.cftime_range(start="1970-01-01", freq="1D", periods=3, calendar=calendar) + values = np.arange(3) + return xr.DataArray(values, dims=("time", ), coords={"time": times}) + + @pytest.fixture def ds(): ds = xr.Dataset() @@ -472,6 +481,19 @@ def test_interpolate_na_nan_block_lengths(y, lengths): assert_equal(actual, expected) +@requires_cftime +def test_get_clean_interp_index(cf_da): + """The index for CFTimeIndex is in units of days. This means that if two series using a 360 and 365 days + calendar each have a trend of .01C/year, the linear regression coefficients will be different because they + have different number of days. + + Another option would be to have an index in units of years, but this would likely create other difficulties. + """ + from xarray.core.missing import get_clean_interp_index + i = get_clean_interp_index(cf_da, dim="time") + np.testing.assert_array_equal(i, [0, 1, 2]) + + @pytest.fixture def da_time(): return xr.DataArray( From 77bb24cb3e53931b81b5f335befcaf436146f4cb Mon Sep 17 00:00:00 2001 From: David Huard Date: Mon, 16 Dec 2019 13:19:26 -0500 Subject: [PATCH 02/36] black --- xarray/core/missing.py | 5 ++++- xarray/tests/test_missing.py | 13 ++++++++++--- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index b8ecda69563..1bc9e48a9f4 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -265,7 +265,10 @@ def get_clean_interp_index(arr, dim: Hashable, use_coordinate: Union[str, bool] # Numerical datetime values are defined with respect to 1970-01-01T00:00:00 in units of days if isinstance(index, CFTimeIndex): offset = type(index[0])(1970, 1, 1) - index = Variable(data=datetime_to_numeric(index, offset=offset, datetime_unit="D"), dims=(dim,)) + index = Variable( + data=datetime_to_numeric(index, offset=offset, datetime_unit="D"), + dims=(dim,), + ) # raise if index cannot be cast to a float (e.g. MultiIndex) try: diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index c97ff5a4ce1..5084a324437 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -21,20 +21,26 @@ requires_dask, requires_scipy, ) -from xarray.tests.test_cftime_offsets import calendar +from xarray.tests.test_cftime_offsets import _CFTIME_CALENDARS from . import requires_cftime + @pytest.fixture def da(): return xr.DataArray([0, np.nan, 1, 2, np.nan, 3, 4, 5, np.nan, 6, 7], dims="time") +@pytest.fixture(params=_CFTIME_CALENDARS) +def calendar(request): + return request.param + + @pytest.fixture def cf_da(calendar): times = xr.cftime_range(start="1970-01-01", freq="1D", periods=3, calendar=calendar) values = np.arange(3) - return xr.DataArray(values, dims=("time", ), coords={"time": times}) + return xr.DataArray(values, dims=("time",), coords={"time": times}) @pytest.fixture @@ -487,9 +493,10 @@ def test_get_clean_interp_index(cf_da): calendar each have a trend of .01C/year, the linear regression coefficients will be different because they have different number of days. - Another option would be to have an index in units of years, but this would likely create other difficulties. + Another option would be to have an index in units of years, but this would likely create other difficulties. """ from xarray.core.missing import get_clean_interp_index + i = get_clean_interp_index(cf_da, dim="time") np.testing.assert_array_equal(i, [0, 1, 2]) From 03e77698c64477fee27b87ee566ddc0e385f6042 Mon Sep 17 00:00:00 2001 From: David Huard Date: Mon, 16 Dec 2019 13:51:36 -0500 Subject: [PATCH 03/36] added test comparing cftime index with standard index --- xarray/tests/test_missing.py | 42 ++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 5084a324437..de7b38d3b9d 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -20,10 +20,10 @@ requires_bottleneck, requires_dask, requires_scipy, + requires_cftime, ) from xarray.tests.test_cftime_offsets import _CFTIME_CALENDARS -from . import requires_cftime @pytest.fixture @@ -31,16 +31,16 @@ def da(): return xr.DataArray([0, np.nan, 1, 2, np.nan, 3, 4, 5, np.nan, 6, 7], dims="time") -@pytest.fixture(params=_CFTIME_CALENDARS) -def calendar(request): - return request.param - - @pytest.fixture -def cf_da(calendar): - times = xr.cftime_range(start="1970-01-01", freq="1D", periods=3, calendar=calendar) - values = np.arange(3) - return xr.DataArray(values, dims=("time",), coords={"time": times}) +def cf_da(): + def _cf_da(calendar, freq="1D"): + times = xr.cftime_range( + start="1970-01-01", freq=freq, periods=10, calendar=calendar + ) + values = np.arange(10) + return xr.DataArray(values, dims=("time",), coords={"time": times}) + + return _cf_da @pytest.fixture @@ -488,17 +488,31 @@ def test_interpolate_na_nan_block_lengths(y, lengths): @requires_cftime -def test_get_clean_interp_index(cf_da): +@pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) +def test_get_clean_interp_index_calendar(cf_da, calendar): """The index for CFTimeIndex is in units of days. This means that if two series using a 360 and 365 days calendar each have a trend of .01C/year, the linear regression coefficients will be different because they have different number of days. Another option would be to have an index in units of years, but this would likely create other difficulties. """ - from xarray.core.missing import get_clean_interp_index + i = get_clean_interp_index(cf_da(calendar), dim="time") + np.testing.assert_array_equal(i, range(10)) - i = get_clean_interp_index(cf_da, dim="time") - np.testing.assert_array_equal(i, [0, 1, 2]) + +@requires_cftime +@pytest.mark.parametrize( + ("calendar", "freq"), + zip(["julian", "gregorian", "proleptic_gregorian"], ["1D", "1M", "1Y"]), +) +def test_get_clean_interp_index_dt(cf_da, calendar, freq): + """In the gregorian case, the index should be proportional to normal datetimes.""" + g = cf_da(calendar, freq=freq) + g["stime"] = xr.Variable(data=g.time.to_index().to_datetimeindex(), dims=("time",)) + + gi = get_clean_interp_index(g, "time") * 1e9 * 86400 + si = get_clean_interp_index(g, "time", use_coordinate="stime") + np.testing.assert_array_equal(gi, si) @pytest.fixture From e169cf484b8d6f048ea056731b2c601fed15a309 Mon Sep 17 00:00:00 2001 From: David Huard Date: Mon, 16 Dec 2019 13:53:37 -0500 Subject: [PATCH 04/36] added comment --- xarray/tests/test_missing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index de7b38d3b9d..6b8006f934c 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -510,7 +510,7 @@ def test_get_clean_interp_index_dt(cf_da, calendar, freq): g = cf_da(calendar, freq=freq) g["stime"] = xr.Variable(data=g.time.to_index().to_datetimeindex(), dims=("time",)) - gi = get_clean_interp_index(g, "time") * 1e9 * 86400 + gi = get_clean_interp_index(g, "time") * 1e9 * 86400 # Convert days to nanoseconds si = get_clean_interp_index(g, "time", use_coordinate="stime") np.testing.assert_array_equal(gi, si) From 303020f03a95bed3c1eb107a95de64a0cc095826 Mon Sep 17 00:00:00 2001 From: David Huard Date: Mon, 16 Dec 2019 14:12:57 -0500 Subject: [PATCH 05/36] index in ns instead of days --- xarray/core/missing.py | 4 ++-- xarray/tests/test_missing.py | 17 +++++++++++++---- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 1bc9e48a9f4..fde8f5e5b51 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -262,11 +262,11 @@ def get_clean_interp_index(arr, dim: Hashable, use_coordinate: Union[str, bool] raise ValueError(f"Index {index.name!r} has duplicate values") # Special case for non-standard calendar indexes - # Numerical datetime values are defined with respect to 1970-01-01T00:00:00 in units of days + # Numerical datetime values are defined with respect to 1970-01-01T00:00:00 in units of nanoseconds if isinstance(index, CFTimeIndex): offset = type(index[0])(1970, 1, 1) index = Variable( - data=datetime_to_numeric(index, offset=offset, datetime_unit="D"), + data=datetime_to_numeric(index, offset=offset, datetime_unit="ns"), dims=(dim,), ) diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 6b8006f934c..9b7f8d3e686 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -497,24 +497,33 @@ def test_get_clean_interp_index_calendar(cf_da, calendar): Another option would be to have an index in units of years, but this would likely create other difficulties. """ i = get_clean_interp_index(cf_da(calendar), dim="time") - np.testing.assert_array_equal(i, range(10)) + np.testing.assert_array_equal(i, np.arange(10) * 1e9 * 86400) @requires_cftime @pytest.mark.parametrize( - ("calendar", "freq"), - zip(["julian", "gregorian", "proleptic_gregorian"], ["1D", "1M", "1Y"]), + ("calendar", "freq"), zip(["gregorian", "proleptic_gregorian"], ["1D", "1M", "1Y"]) ) def test_get_clean_interp_index_dt(cf_da, calendar, freq): """In the gregorian case, the index should be proportional to normal datetimes.""" g = cf_da(calendar, freq=freq) g["stime"] = xr.Variable(data=g.time.to_index().to_datetimeindex(), dims=("time",)) - gi = get_clean_interp_index(g, "time") * 1e9 * 86400 # Convert days to nanoseconds + gi = get_clean_interp_index(g, "time") si = get_clean_interp_index(g, "time", use_coordinate="stime") np.testing.assert_array_equal(gi, si) +@pytest.mark.xfail +def test_get_clean_interp_index_overflow(): + da = xr.DataArray( + [0, 1, 2], + dims=("time",), + coords={"time": xr.cftime_range("0000-01-01", periods=3, calendar="360_day")}, + ) + get_clean_interp_index(da, "time") + + @pytest.fixture def da_time(): return xr.DataArray( From 210fb94ca24ce7fc40bcd7f925cdc19d38b19978 Mon Sep 17 00:00:00 2001 From: David Huard Date: Mon, 16 Dec 2019 14:57:06 -0500 Subject: [PATCH 06/36] pep8 --- xarray/tests/test_dataarray.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 1e1645dcdf1..f957316d8ac 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -4327,7 +4327,6 @@ def test_rolling_reduce(da, center, min_periods, window, name): assert actual.dims == expected.dims - @pytest.mark.parametrize("center", (True, False)) @pytest.mark.parametrize("min_periods", (None, 1, 2, 3)) @pytest.mark.parametrize("window", (1, 2, 3, 4)) From 1cfe72d694b790072e3845462433b23ddf4d67d0 Mon Sep 17 00:00:00 2001 From: David Huard Date: Wed, 18 Dec 2019 12:28:55 -0500 Subject: [PATCH 07/36] datetime_to_numeric: convert timedelta objects using np.timedelta64 type conversion. add overflow tests --- xarray/core/duck_array_ops.py | 43 +++++++++++++++++++++-------- xarray/tests/test_duck_array_ops.py | 22 ++++++++++++++- 2 files changed, 52 insertions(+), 13 deletions(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index cf616acb485..7d84bc3cc03 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -372,43 +372,62 @@ def _datetime_nanmin(array): def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float): - """Convert an array containing datetime-like data to an array of floats. + """Return an array containing datetime-like data to numerical values. + + Convert the datetime array to a timedelta relative to an offset. Parameters ---------- - da : np.array - Input data - offset: Scalar with the same type of array or None - If None, subtract minimum values to reduce round off error - datetime_unit: None or any of {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', - 'us', 'ns', 'ps', 'fs', 'as'} - dtype: target dtype + da : array-like + Input data + offset: None, datetime or cftime.datetime + Datetime offset. If None, this is set by default to the array's minimum + value to reduce round off errors. + datetime_unit: {None, Y, M, W, D, h, m, s, ms, us, ns, ps, fs, as} + If not None, convert output to a given datetime unit. Note that some + conversions are not allowed due to non-linear relationships between units. + dtype: dtype + Output dtype. Returns ------- array + Numerical representation of datetime object relative to an offset. + + Notes + ----- + Some datetime unit conversions won't work, for example from days to years, even + though some calendars would allow for them (e.g. no_leap). This is because there + is no `cftime.timedelta` object. """ # TODO: make this function dask-compatible? + # Set offset to minimum if not given if offset is None: if array.dtype.kind in "Mm": offset = _datetime_nanmin(array) else: offset = min(array) + + # Compute timedelta object. + # For np.datetime64, this can silently yield garbage due to overflow. array = array - offset - if not hasattr(array, "dtype"): # scalar is converted to 0d-array + # Scalar is converted to 0d-array + if not hasattr(array, "dtype"): array = np.array(array) + # Convert timedelta objects to timedelta64[ms] dtype. if array.dtype.kind in "O": - # possibly convert object array containing datetime.timedelta - array = np.asarray(pd.Series(array.ravel())).reshape(array.shape) + array = array.astype("timedelta64[ms]") + # Convert to specified timedelta units. if datetime_unit: array = array / np.timedelta64(1, datetime_unit) - # convert np.NaT to np.nan + # Convert np.NaT to np.nan if array.dtype.kind in "mM": return np.where(isnull(array), np.nan, array.astype(dtype)) + return array.astype(dtype) diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index aee7bbd6b11..3e8f533af7b 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -669,10 +669,22 @@ def test_datetime_to_numeric_datetime64(): expected = 24 * np.arange(0, 35, 7).astype(dtype) np.testing.assert_array_equal(result, expected) + # Test overflow. Need to convert to ms first otherwise we get garbage + # TODO: make datetime_to_numeric more robust to overflow errors. + offset = np.datetime64("0001-01-01") + ms = times.astype("datetime64[ms]") + result = duck_array_ops.datetime_to_numeric( + ms, offset=offset, datetime_unit="D", dtype=int + ) + expected = 730119 + np.arange(0, 35, 7) + np.testing.assert_array_equal(result, expected) + @requires_cftime def test_datetime_to_numeric_cftime(): - times = cftime_range("2000", periods=5, freq="7D").values + import cftime + + times = cftime_range("2000", periods=5, freq="7D", calendar="standard").values result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h") expected = 24 * np.arange(0, 35, 7) np.testing.assert_array_equal(result, expected) @@ -686,3 +698,11 @@ def test_datetime_to_numeric_cftime(): result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h", dtype=dtype) expected = 24 * np.arange(0, 35, 7).astype(dtype) np.testing.assert_array_equal(result, expected) + + # Test potential overflow (outputs are different from timedelta64. Expected ?) + offset = cftime.DatetimeGregorian(1, 1, 1) + result = duck_array_ops.datetime_to_numeric( + times, offset=offset, datetime_unit="D", dtype=int + ) + expected = 730121 + np.arange(0, 35, 7) + np.testing.assert_array_equal(result, expected) From 49641632ac4c13f53ff5499d0bc583690ad70f4d Mon Sep 17 00:00:00 2001 From: David Huard Date: Wed, 18 Dec 2019 12:33:00 -0500 Subject: [PATCH 08/36] added interp test --- xarray/tests/test_interp.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/xarray/tests/test_interp.py b/xarray/tests/test_interp.py index b93325d7eab..e3af8b5873a 100644 --- a/xarray/tests/test_interp.py +++ b/xarray/tests/test_interp.py @@ -662,3 +662,10 @@ def test_datetime_interp_noerror(): coords={"time": pd.date_range("01-01-2001", periods=50, freq="H")}, ) a.interp(x=xi, time=xi.time) # should not raise an error + + +@requires_cftime +def test_3641(): + times = xr.cftime_range("0001", periods=3, freq="500Y") + da = xr.DataArray(range(3), dims=["time"], coords=[times]) + da.interp(time=["0002-05-01"]) From 83f6c89b958612a8ac7766a0c67d69a58b5eb196 Mon Sep 17 00:00:00 2001 From: David Huard Date: Wed, 18 Dec 2019 16:36:43 -0500 Subject: [PATCH 09/36] switched clean_interp_index resolution to us. Fixed interpolate_na and added support for CFTimeIndex. --- xarray/core/missing.py | 10 ++++++---- xarray/tests/test_missing.py | 9 +++------ 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index fde8f5e5b51..f3110429c1a 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -263,10 +263,10 @@ def get_clean_interp_index(arr, dim: Hashable, use_coordinate: Union[str, bool] # Special case for non-standard calendar indexes # Numerical datetime values are defined with respect to 1970-01-01T00:00:00 in units of nanoseconds - if isinstance(index, CFTimeIndex): + if isinstance(index, (CFTimeIndex, pd.DatetimeIndex)): offset = type(index[0])(1970, 1, 1) index = Variable( - data=datetime_to_numeric(index, offset=offset, datetime_unit="ns"), + data=datetime_to_numeric(index, offset=offset, datetime_unit="us"), dims=(dim,), ) @@ -295,6 +295,8 @@ def interp_na( ): """Interpolate values according to different methods. """ + from xarray.coding.cftimeindex import CFTimeIndex + if dim is None: raise NotImplementedError("dim is a required argument") @@ -308,7 +310,7 @@ def interp_na( if ( dim in self.indexes - and isinstance(self.indexes[dim], pd.DatetimeIndex) + and isinstance(self.indexes[dim], (pd.DatetimeIndex, CFTimeIndex)) and use_coordinate ): if not isinstance(max_gap, (np.timedelta64, pd.Timedelta, str)): @@ -327,7 +329,7 @@ def interp_na( if isinstance(max_gap, pd.Timedelta): max_gap = np.timedelta64(max_gap.value, "ns") - max_gap = np.timedelta64(max_gap, "ns").astype(np.float64) + max_gap = np.timedelta64(max_gap, "us").astype(np.float64) if not use_coordinate: if not isinstance(max_gap, (Number, np.number)): diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 9b7f8d3e686..3656061c4d7 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -489,7 +489,7 @@ def test_interpolate_na_nan_block_lengths(y, lengths): @requires_cftime @pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) -def test_get_clean_interp_index_calendar(cf_da, calendar): +def test_get_clean_interp_index_cf_calendar(cf_da, calendar): """The index for CFTimeIndex is in units of days. This means that if two series using a 360 and 365 days calendar each have a trend of .01C/year, the linear regression coefficients will be different because they have different number of days. @@ -497,7 +497,7 @@ def test_get_clean_interp_index_calendar(cf_da, calendar): Another option would be to have an index in units of years, but this would likely create other difficulties. """ i = get_clean_interp_index(cf_da(calendar), dim="time") - np.testing.assert_array_equal(i, np.arange(10) * 1e9 * 86400) + np.testing.assert_array_equal(i, np.arange(10) * 1e6 * 86400) @requires_cftime @@ -553,10 +553,7 @@ def test_interpolate_na_max_gap_errors(da_time): @requires_bottleneck -@pytest.mark.parametrize( - "time_range_func", - [pd.date_range, pytest.param(xr.cftime_range, marks=pytest.mark.xfail)], -) +@pytest.mark.parametrize("time_range_func", [pd.date_range, xr.cftime_range]) @pytest.mark.parametrize("transform", [lambda x: x, lambda x: x.to_dataset(name="a")]) @pytest.mark.parametrize( "max_gap", ["3H", np.timedelta64(3, "h"), pd.to_timedelta("3H")] From 629895382da66066b6ece44f0c97f2c047d6b875 Mon Sep 17 00:00:00 2001 From: David Huard Date: Wed, 18 Dec 2019 17:19:07 -0500 Subject: [PATCH 10/36] Error message to explain overflow problem. --- xarray/coding/cftimeindex.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index 4005d4fbf6d..8b440812ca9 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -430,7 +430,14 @@ def __sub__(self, other): import cftime if isinstance(other, (CFTimeIndex, cftime.datetime)): - return pd.TimedeltaIndex(np.array(self) - np.array(other)) + try: + return pd.TimedeltaIndex(np.array(self) - np.array(other)) + except OverflowError: + raise ValueError( + "The time difference exceeds the range of values " + "that can be expressed at the nanosecond resolution." + ) + elif isinstance(other, pd.TimedeltaIndex): return CFTimeIndex(np.array(self) - other.to_pytimedelta()) else: From 2ba180353ef00e1c078b3d40619e7793194e50dd Mon Sep 17 00:00:00 2001 From: David Huard Date: Fri, 20 Dec 2019 08:40:28 -0500 Subject: [PATCH 11/36] switched timedelta64 units from ms to us --- xarray/core/duck_array_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 7d84bc3cc03..e5a5eaf2557 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -418,7 +418,7 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float): # Convert timedelta objects to timedelta64[ms] dtype. if array.dtype.kind in "O": - array = array.astype("timedelta64[ms]") + array = array.astype("timedelta64[us]") # Convert to specified timedelta units. if datetime_unit: From e873da253593b51b4f146c801266dc3b3591210c Mon Sep 17 00:00:00 2001 From: David Huard Date: Mon, 6 Jan 2020 13:00:16 -0500 Subject: [PATCH 12/36] reverted default user-visible resolution to ns. Converts to float, possibly lossy. --- xarray/core/duck_array_ops.py | 30 +++++++++++++++++++++-------- xarray/core/missing.py | 7 ++++--- xarray/tests/test_duck_array_ops.py | 8 ++++++++ xarray/tests/test_missing.py | 2 +- 4 files changed, 35 insertions(+), 12 deletions(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index e5a5eaf2557..585cd725c4f 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -410,6 +410,7 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float): # Compute timedelta object. # For np.datetime64, this can silently yield garbage due to overflow. + # One option is to enforce 1970-01-01 as the universal offset. array = array - offset # Scalar is converted to 0d-array @@ -418,17 +419,15 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float): # Convert timedelta objects to timedelta64[ms] dtype. if array.dtype.kind in "O": - array = array.astype("timedelta64[us]") - - # Convert to specified timedelta units. - if datetime_unit: - array = array / np.timedelta64(1, datetime_unit) + return py_timedelta_to_float(array, datetime_unit or "ns").astype(dtype) # Convert np.NaT to np.nan - if array.dtype.kind in "mM": - return np.where(isnull(array), np.nan, array.astype(dtype)) + elif array.dtype.kind in "mM": - return array.astype(dtype) + # Convert to specified timedelta units. + if datetime_unit: + array = array / np.timedelta64(1, datetime_unit) + return np.where(isnull(array), np.nan, array.astype(dtype)) def _to_pytimedelta(array, unit="us"): @@ -436,6 +435,21 @@ def _to_pytimedelta(array, unit="us"): return index.to_pytimedelta().reshape(array.shape) +def py_timedelta_to_float(array, datetime_unit="ns"): + """Convert a timedelta object to a float, possibly at a loss of resolution. + + Notes + ----- + This function converts a timedelta object to a float using microsecond resolution + timedelta64 dtype, which covers a fairly wide span of years at a resolution that + is reasonable for most applications. + + """ + array = np.asarray(array).astype("timedelta64[us]").astype(np.float64) + conversion_factor = np.timedelta64(1, "us") / np.timedelta64(1, datetime_unit) + return conversion_factor * array + + def mean(array, axis=None, skipna=None, **kwargs): """inhouse mean that can handle np.datetime64 or cftime.datetime dtypes""" diff --git a/xarray/core/missing.py b/xarray/core/missing.py index f3110429c1a..749f1ddea49 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -9,7 +9,7 @@ from . import utils from .common import _contains_datetime_like_objects, ones_like from .computation import apply_ufunc -from .duck_array_ops import dask_array_type, datetime_to_numeric +from .duck_array_ops import dask_array_type, datetime_to_numeric, py_timedelta_to_float from .utils import OrderedSet, is_scalar from .variable import Variable, broadcast_variables @@ -266,7 +266,7 @@ def get_clean_interp_index(arr, dim: Hashable, use_coordinate: Union[str, bool] if isinstance(index, (CFTimeIndex, pd.DatetimeIndex)): offset = type(index[0])(1970, 1, 1) index = Variable( - data=datetime_to_numeric(index, offset=offset, datetime_unit="us"), + data=datetime_to_numeric(index, offset=offset, datetime_unit="ns"), dims=(dim,), ) @@ -329,7 +329,8 @@ def interp_na( if isinstance(max_gap, pd.Timedelta): max_gap = np.timedelta64(max_gap.value, "ns") - max_gap = np.timedelta64(max_gap, "us").astype(np.float64) + # Convert to float + max_gap = py_timedelta_to_float(max_gap) if not use_coordinate: if not isinstance(max_gap, (Number, np.number)): diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index 3e8f533af7b..b367f440d78 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -1,6 +1,7 @@ import warnings from textwrap import dedent +import datetime as dt import numpy as np import pandas as pd import pytest @@ -19,6 +20,7 @@ rolling_window, stack, where, + py_timedelta_to_float, ) from xarray.core.pycompat import dask_array_type from xarray.testing import assert_allclose, assert_equal @@ -706,3 +708,9 @@ def test_datetime_to_numeric_cftime(): ) expected = 730121 + np.arange(0, 35, 7) np.testing.assert_array_equal(result, expected) + + +@pytest.mark.parametrize("delta", [dt.timedelta(days=1), ]) +def test_py_datetime_to_float(delta): + assert py_timedelta_to_float(delta) == 86400 * 1e9 + diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 3656061c4d7..1a29976d918 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -497,7 +497,7 @@ def test_get_clean_interp_index_cf_calendar(cf_da, calendar): Another option would be to have an index in units of years, but this would likely create other difficulties. """ i = get_clean_interp_index(cf_da(calendar), dim="time") - np.testing.assert_array_equal(i, np.arange(10) * 1e6 * 86400) + np.testing.assert_array_equal(i, np.arange(10) * 1e9 * 86400) @requires_cftime From 532756dcc898c16bb0d8d11c5c4996e5e052854c Mon Sep 17 00:00:00 2001 From: David Huard Date: Mon, 6 Jan 2020 13:01:38 -0500 Subject: [PATCH 13/36] pep8 --- xarray/tests/test_duck_array_ops.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index b367f440d78..ae3d6ebc0db 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -713,4 +713,3 @@ def test_datetime_to_numeric_cftime(): @pytest.mark.parametrize("delta", [dt.timedelta(days=1), ]) def test_py_datetime_to_float(delta): assert py_timedelta_to_float(delta) == 86400 * 1e9 - From 73d87295664a61ffb220c57fbca037d06e17fe4e Mon Sep 17 00:00:00 2001 From: David Huard Date: Mon, 6 Jan 2020 13:06:58 -0500 Subject: [PATCH 14/36] black --- xarray/tests/test_duck_array_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index ae3d6ebc0db..dcbed357876 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -710,6 +710,6 @@ def test_datetime_to_numeric_cftime(): np.testing.assert_array_equal(result, expected) -@pytest.mark.parametrize("delta", [dt.timedelta(days=1), ]) +@pytest.mark.parametrize("delta", [dt.timedelta(days=1),]) def test_py_datetime_to_float(delta): assert py_timedelta_to_float(delta) == 86400 * 1e9 From 42887804f203b7274771e1d949c270397011c576 Mon Sep 17 00:00:00 2001 From: David Huard Date: Mon, 6 Jan 2020 15:25:24 -0500 Subject: [PATCH 15/36] special case for older numpy versions --- xarray/core/duck_array_ops.py | 25 +++++++++++---- xarray/tests/test_duck_array_ops.py | 49 +++++++++++++++++------------ 2 files changed, 48 insertions(+), 26 deletions(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 585cd725c4f..87ea6ec0c06 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -417,7 +417,7 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float): if not hasattr(array, "dtype"): array = np.array(array) - # Convert timedelta objects to timedelta64[ms] dtype. + # Convert timedelta objects to float by first converting to microseconds. if array.dtype.kind in "O": return py_timedelta_to_float(array, datetime_unit or "ns").astype(dtype) @@ -440,13 +440,26 @@ def py_timedelta_to_float(array, datetime_unit="ns"): Notes ----- - This function converts a timedelta object to a float using microsecond resolution - timedelta64 dtype, which covers a fairly wide span of years at a resolution that - is reasonable for most applications. + With Numpy >= 1.7, it's possible to convert directly from `datetime.timedelta` + to `numpy.timedelta64` at the microsecond (us) resolution. This covers a fairly + large span of years. + With earlier Numpy versions, the conversion only works at the nanosecond resolution, + which restricts the span that can be covered. """ - array = np.asarray(array).astype("timedelta64[us]").astype(np.float64) - conversion_factor = np.timedelta64(1, "us") / np.timedelta64(1, datetime_unit) + if np.__version__ < '1.17': + array = np.asarray(array) + array = np.asarray(pd.Series(array.ravel())).reshape(array.shape) + if array.dtype.kind in "O": + raise OverflowError("Could not convert timedelta to float.") + else: + array = array.astype(np.float64) # [ns] + res = "ns" + else: + array = np.asarray(array).astype("timedelta64[us]").astype(np.float64) # [us] + res = "us" + + conversion_factor = np.timedelta64(1, res) / np.timedelta64(1, datetime_unit) return conversion_factor * array diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index dcbed357876..a41c3b600e3 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -671,28 +671,16 @@ def test_datetime_to_numeric_datetime64(): expected = 24 * np.arange(0, 35, 7).astype(dtype) np.testing.assert_array_equal(result, expected) - # Test overflow. Need to convert to ms first otherwise we get garbage - # TODO: make datetime_to_numeric more robust to overflow errors. - offset = np.datetime64("0001-01-01") - ms = times.astype("datetime64[ms]") - result = duck_array_ops.datetime_to_numeric( - ms, offset=offset, datetime_unit="D", dtype=int - ) - expected = 730119 + np.arange(0, 35, 7) - np.testing.assert_array_equal(result, expected) - @requires_cftime def test_datetime_to_numeric_cftime(): - import cftime - times = cftime_range("2000", periods=5, freq="7D", calendar="standard").values - result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h") + result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h", dtype=int) expected = 24 * np.arange(0, 35, 7) np.testing.assert_array_equal(result, expected) offset = times[1] - result = duck_array_ops.datetime_to_numeric(times, offset=offset, datetime_unit="h") + result = duck_array_ops.datetime_to_numeric(times, offset=offset, datetime_unit="h", dtype=int) expected = 24 * np.arange(-7, 28, 7) np.testing.assert_array_equal(result, expected) @@ -701,15 +689,36 @@ def test_datetime_to_numeric_cftime(): expected = 24 * np.arange(0, 35, 7).astype(dtype) np.testing.assert_array_equal(result, expected) - # Test potential overflow (outputs are different from timedelta64. Expected ?) - offset = cftime.DatetimeGregorian(1, 1, 1) + +@requires_cftime +@pytest.mark.xfail(np.__version__ < "1.17", reason="Limited Numpy support for timedelta conversion.") +def test_datetime_to_numeric_overflow(): + import cftime + times = pd.date_range("2000", periods=5, freq="7D").values.astype("datetime64[us]") + cftimes = cftime_range("2000", periods=5, freq="7D", calendar="standard").values + + offset = np.datetime64("0001-01-01") + cfoffset = cftime.DatetimeGregorian(1, 1, 1) + result = duck_array_ops.datetime_to_numeric( times, offset=offset, datetime_unit="D", dtype=int ) - expected = 730121 + np.arange(0, 35, 7) + cfresult = duck_array_ops.datetime_to_numeric( + cftimes, offset=cfoffset, datetime_unit="D", dtype=int + ) + + expected = 730119 + np.arange(0, 35, 7) + + # Outputs are different. Expected ? np.testing.assert_array_equal(result, expected) + np.testing.assert_array_equal(cfresult, expected + 2) -@pytest.mark.parametrize("delta", [dt.timedelta(days=1),]) -def test_py_datetime_to_float(delta): - assert py_timedelta_to_float(delta) == 86400 * 1e9 +def test_py_datetime_to_float(): + assert py_timedelta_to_float(dt.timedelta(days=1)) == 86400 * 1e9 + + if np.__version__ < "1.17": + with pytest.raises(OverflowError): + py_timedelta_to_float(dt.timedelta(days=1e6)) + else: + assert py_timedelta_to_float(dt.timedelta(days=1e6)) == 86400 * 1e15 From 077145e07d19b41e0a63b9b7ee2c620dfc6d0daa Mon Sep 17 00:00:00 2001 From: David Huard Date: Mon, 6 Jan 2020 15:25:42 -0500 Subject: [PATCH 16/36] black --- xarray/core/duck_array_ops.py | 2 +- xarray/tests/test_duck_array_ops.py | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 87ea6ec0c06..860372dec61 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -447,7 +447,7 @@ def py_timedelta_to_float(array, datetime_unit="ns"): With earlier Numpy versions, the conversion only works at the nanosecond resolution, which restricts the span that can be covered. """ - if np.__version__ < '1.17': + if np.__version__ < "1.17": array = np.asarray(array) array = np.asarray(pd.Series(array.ravel())).reshape(array.shape) if array.dtype.kind in "O": diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index a41c3b600e3..adb4897a868 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -680,7 +680,9 @@ def test_datetime_to_numeric_cftime(): np.testing.assert_array_equal(result, expected) offset = times[1] - result = duck_array_ops.datetime_to_numeric(times, offset=offset, datetime_unit="h", dtype=int) + result = duck_array_ops.datetime_to_numeric( + times, offset=offset, datetime_unit="h", dtype=int + ) expected = 24 * np.arange(-7, 28, 7) np.testing.assert_array_equal(result, expected) @@ -691,9 +693,12 @@ def test_datetime_to_numeric_cftime(): @requires_cftime -@pytest.mark.xfail(np.__version__ < "1.17", reason="Limited Numpy support for timedelta conversion.") +@pytest.mark.xfail( + np.__version__ < "1.17", reason="Limited Numpy support for timedelta conversion." +) def test_datetime_to_numeric_overflow(): import cftime + times = pd.date_range("2000", periods=5, freq="7D").values.astype("datetime64[us]") cftimes = cftime_range("2000", periods=5, freq="7D", calendar="standard").values From 758d81cc92afede93ffab2e7a4a519229c246475 Mon Sep 17 00:00:00 2001 From: David Huard Date: Mon, 6 Jan 2020 17:01:03 -0500 Subject: [PATCH 17/36] added xfail for overflow error with numpy < 1.17 --- xarray/tests/test_interp.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/xarray/tests/test_interp.py b/xarray/tests/test_interp.py index e3af8b5873a..d59f88c6f48 100644 --- a/xarray/tests/test_interp.py +++ b/xarray/tests/test_interp.py @@ -664,6 +664,10 @@ def test_datetime_interp_noerror(): a.interp(x=xi, time=xi.time) # should not raise an error +@pytest.mark.xfail( + np.__version__ < "1.17", + reason="Numpy support for conversion from timedelta objects to " "timedelta64.", +) @requires_cftime def test_3641(): times = xr.cftime_range("0001", periods=3, freq="500Y") From d0d8bfeb6ecfe48591ef15ecbc489f0407477bb9 Mon Sep 17 00:00:00 2001 From: David Huard Date: Tue, 14 Jan 2020 09:32:10 -0500 Subject: [PATCH 18/36] changes following PR comments from spencerclark --- xarray/core/duck_array_ops.py | 53 ++++++++++++++++++++++++++--- xarray/core/missing.py | 25 ++++---------- xarray/tests/test_duck_array_ops.py | 20 +++++++---- xarray/tests/test_interp.py | 2 +- xarray/tests/test_missing.py | 3 +- 5 files changed, 70 insertions(+), 33 deletions(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 860372dec61..bd187e0652f 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -7,6 +7,7 @@ import inspect import warnings from functools import partial +from distutils.version import LooseVersion import numpy as np import pandas as pd @@ -372,7 +373,7 @@ def _datetime_nanmin(array): def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float): - """Return an array containing datetime-like data to numerical values. + """Convert an array containing datetime-like data to numerical values. Convert the datetime array to a timedelta relative to an offset. @@ -430,24 +431,68 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float): return np.where(isnull(array), np.nan, array.astype(dtype)) +def timedelta_to_numeric(array, datetime_unit="ns", dtype=float): + """Convert an array containing timedelta-like data to numerical values. + """ + import datetime as dt + + if isinstance(array, dt.timedelta): + out = py_timedelta_to_float(array, datetime_unit) + elif isinstance(array, np.timedelta64): + out = np_timedelta64_to_float(array, datetime_unit) + elif isinstance(array, pd.Timedelta): + out = pd_timedelta_to_float(array, datetime_unit) + elif isinstance(array, pd.TimedeltaIndex): + out = pd_timedeltaindex_to_float(array, datetime_unit) + elif isinstance(array, str): + try: + a = pd.to_timedelta(array) + except ValueError: + raise ValueError( + f"Could not convert {array!r} to timedelta64 using pandas.to_timedelta" + ) + return py_timedelta_to_float(a, datetime_unit) + else: + raise TypeError( + f"Expected array of type str, pandas.Timedelta, pandas.TimedeltaIndex, " + f"datetime.timedelta or numpy.timedelta64, but received {type(array).__name__}" + ) + return out.astype(dtype) + + def _to_pytimedelta(array, unit="us"): index = pd.TimedeltaIndex(array.ravel(), unit=unit) return index.to_pytimedelta().reshape(array.shape) -def py_timedelta_to_float(array, datetime_unit="ns"): +def np_timedelta64_to_float(array, datetime_unit): + array = array.astype("timedelta64[us]") + conversion_factor = np.timedelta64(1, "us") / np.timedelta64(1, datetime_unit) + return conversion_factor * array + + +def pd_timedelta_to_float(array, datetime_units): + array = np.timedelta64(array.value, "ns").astype(np.float64) + return np_timedelta64_to_float(array, datetime_units) + + +def pd_timedeltaindex_to_float(array, datetime_units): + return np_timedelta64_to_float(array.values, datetime_units) + + +def py_timedelta_to_float(array, datetime_unit): """Convert a timedelta object to a float, possibly at a loss of resolution. Notes ----- - With Numpy >= 1.7, it's possible to convert directly from `datetime.timedelta` + With Numpy >= 1.17, it's possible to convert directly from `datetime.timedelta` to `numpy.timedelta64` at the microsecond (us) resolution. This covers a fairly large span of years. With earlier Numpy versions, the conversion only works at the nanosecond resolution, which restricts the span that can be covered. """ - if np.__version__ < "1.17": + if LooseVersion(np.__version__) < LooseVersion("1.17"): array = np.asarray(array) array = np.asarray(pd.Series(array.ravel())).reshape(array.shape) if array.dtype.kind in "O": diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 749f1ddea49..b20441e993c 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -2,6 +2,7 @@ from functools import partial from numbers import Number from typing import Any, Callable, Dict, Hashable, Sequence, Union +import datetime as dt import numpy as np import pandas as pd @@ -9,7 +10,7 @@ from . import utils from .common import _contains_datetime_like_objects, ones_like from .computation import apply_ufunc -from .duck_array_ops import dask_array_type, datetime_to_numeric, py_timedelta_to_float +from .duck_array_ops import dask_array_type, datetime_to_numeric, timedelta_to_numeric from .utils import OrderedSet, is_scalar from .variable import Variable, broadcast_variables @@ -265,6 +266,8 @@ def get_clean_interp_index(arr, dim: Hashable, use_coordinate: Union[str, bool] # Numerical datetime values are defined with respect to 1970-01-01T00:00:00 in units of nanoseconds if isinstance(index, (CFTimeIndex, pd.DatetimeIndex)): offset = type(index[0])(1970, 1, 1) + if isinstance(index, CFTimeIndex): + index = index.values index = Variable( data=datetime_to_numeric(index, offset=offset, datetime_unit="ns"), dims=(dim,), @@ -290,7 +293,7 @@ def interp_na( use_coordinate: Union[bool, str] = True, method: str = "linear", limit: int = None, - max_gap: Union[int, float, str, pd.Timedelta, np.timedelta64] = None, + max_gap: Union[int, float, str, pd.Timedelta, np.timedelta64, dt.timedelta] = None, **kwargs, ): """Interpolate values according to different methods. @@ -313,24 +316,8 @@ def interp_na( and isinstance(self.indexes[dim], (pd.DatetimeIndex, CFTimeIndex)) and use_coordinate ): - if not isinstance(max_gap, (np.timedelta64, pd.Timedelta, str)): - raise TypeError( - f"Underlying index is DatetimeIndex. Expected max_gap of type str, pandas.Timedelta or numpy.timedelta64 but received {max_type}" - ) - - if isinstance(max_gap, str): - try: - max_gap = pd.to_timedelta(max_gap) - except ValueError: - raise ValueError( - f"Could not convert {max_gap!r} to timedelta64 using pandas.to_timedelta" - ) - - if isinstance(max_gap, pd.Timedelta): - max_gap = np.timedelta64(max_gap.value, "ns") - # Convert to float - max_gap = py_timedelta_to_float(max_gap) + max_gap = timedelta_to_numeric(max_gap) if not use_coordinate: if not isinstance(max_gap, (Number, np.number)): diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index adb4897a868..ea647e15d92 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -700,10 +700,12 @@ def test_datetime_to_numeric_overflow(): import cftime times = pd.date_range("2000", periods=5, freq="7D").values.astype("datetime64[us]") - cftimes = cftime_range("2000", periods=5, freq="7D", calendar="standard").values + cftimes = cftime_range( + "2000", periods=5, freq="7D", calendar="proleptic_gregorian" + ).values offset = np.datetime64("0001-01-01") - cfoffset = cftime.DatetimeGregorian(1, 1, 1) + cfoffset = cftime.DatetimeProlepticGregorian(1, 1, 1) result = duck_array_ops.datetime_to_numeric( times, offset=offset, datetime_unit="D", dtype=int @@ -714,16 +716,20 @@ def test_datetime_to_numeric_overflow(): expected = 730119 + np.arange(0, 35, 7) - # Outputs are different. Expected ? np.testing.assert_array_equal(result, expected) - np.testing.assert_array_equal(cfresult, expected + 2) + np.testing.assert_array_equal(cfresult, expected) def test_py_datetime_to_float(): - assert py_timedelta_to_float(dt.timedelta(days=1)) == 86400 * 1e9 + assert py_timedelta_to_float(dt.timedelta(days=1), "ns") == 86400 * 1e9 if np.__version__ < "1.17": with pytest.raises(OverflowError): - py_timedelta_to_float(dt.timedelta(days=1e6)) + py_timedelta_to_float(dt.timedelta(days=1e6), "ns") else: - assert py_timedelta_to_float(dt.timedelta(days=1e6)) == 86400 * 1e15 + assert py_timedelta_to_float(dt.timedelta(days=1e6), "ps") == 86400 * 1e18 + assert py_timedelta_to_float(dt.timedelta(days=1e6), "ns") == 86400 * 1e15 + assert py_timedelta_to_float(dt.timedelta(days=1e6), "us") == 86400 * 1e12 + assert py_timedelta_to_float(dt.timedelta(days=1e6), "ms") == 86400 * 1e9 + assert py_timedelta_to_float(dt.timedelta(days=1e6), "s") == 86400 * 1e6 + assert py_timedelta_to_float(dt.timedelta(days=1e6), "D") == 1e6 diff --git a/xarray/tests/test_interp.py b/xarray/tests/test_interp.py index d59f88c6f48..bc985fa37fc 100644 --- a/xarray/tests/test_interp.py +++ b/xarray/tests/test_interp.py @@ -666,7 +666,7 @@ def test_datetime_interp_noerror(): @pytest.mark.xfail( np.__version__ < "1.17", - reason="Numpy support for conversion from timedelta objects to " "timedelta64.", + reason="Numpy support for conversion from timedelta objects to timedelta64.", ) @requires_cftime def test_3641(): diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 1a29976d918..e868ca16c36 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -514,7 +514,6 @@ def test_get_clean_interp_index_dt(cf_da, calendar, freq): np.testing.assert_array_equal(gi, si) -@pytest.mark.xfail def test_get_clean_interp_index_overflow(): da = xr.DataArray( [0, 1, 2], @@ -542,7 +541,7 @@ def test_interpolate_na_max_gap_errors(da_time): da_time.interpolate_na("t", max_gap=(1,)) da_time["t"] = pd.date_range("2001-01-01", freq="H", periods=11) - with raises_regex(TypeError, "Underlying index is"): + with raises_regex(TypeError, "Expected array of type str"): da_time.interpolate_na("t", max_gap=1) with raises_regex(TypeError, "Expected integer or floating point"): From 6c9630ae59dfdafdbaa97a0a2f7f259c6685f08d Mon Sep 17 00:00:00 2001 From: David Huard Date: Fri, 17 Jan 2020 15:12:56 -0500 Subject: [PATCH 19/36] bypass pandas to convert timedeltas to floats. avoids overflow errors. --- xarray/core/duck_array_ops.py | 10 ++-------- xarray/tests/test_duck_array_ops.py | 19 +++++++------------ 2 files changed, 9 insertions(+), 20 deletions(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index bd187e0652f..78600cadeda 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -494,17 +494,11 @@ def py_timedelta_to_float(array, datetime_unit): """ if LooseVersion(np.__version__) < LooseVersion("1.17"): array = np.asarray(array) - array = np.asarray(pd.Series(array.ravel())).reshape(array.shape) - if array.dtype.kind in "O": - raise OverflowError("Could not convert timedelta to float.") - else: - array = array.astype(np.float64) # [ns] - res = "ns" + array = np.reshape([a.total_seconds() for a in array.ravel()], array.shape) * 1e6 else: array = np.asarray(array).astype("timedelta64[us]").astype(np.float64) # [us] - res = "us" - conversion_factor = np.timedelta64(1, res) / np.timedelta64(1, datetime_unit) + conversion_factor = np.timedelta64(1, "us") / np.timedelta64(1, datetime_unit) return conversion_factor * array diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index ea647e15d92..d18ea5bc8c7 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -720,16 +720,11 @@ def test_datetime_to_numeric_overflow(): np.testing.assert_array_equal(cfresult, expected) -def test_py_datetime_to_float(): +def test_py_timedelta_to_float(): assert py_timedelta_to_float(dt.timedelta(days=1), "ns") == 86400 * 1e9 - - if np.__version__ < "1.17": - with pytest.raises(OverflowError): - py_timedelta_to_float(dt.timedelta(days=1e6), "ns") - else: - assert py_timedelta_to_float(dt.timedelta(days=1e6), "ps") == 86400 * 1e18 - assert py_timedelta_to_float(dt.timedelta(days=1e6), "ns") == 86400 * 1e15 - assert py_timedelta_to_float(dt.timedelta(days=1e6), "us") == 86400 * 1e12 - assert py_timedelta_to_float(dt.timedelta(days=1e6), "ms") == 86400 * 1e9 - assert py_timedelta_to_float(dt.timedelta(days=1e6), "s") == 86400 * 1e6 - assert py_timedelta_to_float(dt.timedelta(days=1e6), "D") == 1e6 + assert py_timedelta_to_float(dt.timedelta(days=1e6), "ps") == 86400 * 1e18 + assert py_timedelta_to_float(dt.timedelta(days=1e6), "ns") == 86400 * 1e15 + assert py_timedelta_to_float(dt.timedelta(days=1e6), "us") == 86400 * 1e12 + assert py_timedelta_to_float(dt.timedelta(days=1e6), "ms") == 86400 * 1e9 + assert py_timedelta_to_float(dt.timedelta(days=1e6), "s") == 86400 * 1e6 + assert py_timedelta_to_float(dt.timedelta(days=1e6), "D") == 1e6 From d18c775be326efe50f91cbf89d8430d83b0533db Mon Sep 17 00:00:00 2001 From: David Huard Date: Fri, 17 Jan 2020 15:43:15 -0500 Subject: [PATCH 20/36] black --- xarray/core/duck_array_ops.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 78600cadeda..3d289215f6b 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -494,7 +494,9 @@ def py_timedelta_to_float(array, datetime_unit): """ if LooseVersion(np.__version__) < LooseVersion("1.17"): array = np.asarray(array) - array = np.reshape([a.total_seconds() for a in array.ravel()], array.shape) * 1e6 + array = ( + np.reshape([a.total_seconds() for a in array.ravel()], array.shape) * 1e6 + ) else: array = np.asarray(array).astype("timedelta64[us]").astype(np.float64) # [us] From 6615c975245681b4728ce2b12e1e44b8fb0dbb9c Mon Sep 17 00:00:00 2001 From: David Huard Date: Mon, 20 Jan 2020 11:32:51 -0500 Subject: [PATCH 21/36] removed numpy conversion. added docstrings. renamed tests. --- xarray/core/duck_array_ops.py | 47 ++++++++++++++++++----------- xarray/tests/test_duck_array_ops.py | 5 +-- xarray/tests/test_interp.py | 4 --- xarray/tests/test_missing.py | 2 +- 4 files changed, 32 insertions(+), 26 deletions(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 926e37ad15e..021761a50bf 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -433,6 +433,17 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float): def timedelta_to_numeric(array, datetime_unit="ns", dtype=float): """Convert an array containing timedelta-like data to numerical values. + + Parameters + ---------- + array : datetime.timedelta, numpy.timedelta64, pandas.Timedelta, pandas.TimedeltaIndex, str + Time delta representation. + datetime_unit : {Y, M, W, D, h, m, s, ms, us, ns, ps, fs, as} + The time units of the output values. Note that some conversions are not allowed due to + non-linear relationships between units. + dtype : type + The output data type. + """ import datetime as dt @@ -466,40 +477,42 @@ def _to_pytimedelta(array, unit="us"): def np_timedelta64_to_float(array, datetime_unit): + """Convert numpy.timedelta64 to float. + + Notes + ----- + The array is first converted to microseconds, which is less likely to + cause overflow errors. + """ array = array.astype("timedelta64[us]") conversion_factor = np.timedelta64(1, "us") / np.timedelta64(1, datetime_unit) return conversion_factor * array def pd_timedelta_to_float(array, datetime_units): + """Convert pandas.Timedelta to float. + + Notes + ----- + Built on the assumption that pandas timedelta values are in nanoseconds, + which is also the numpy default resolution. + """ array = np.timedelta64(array.value, "ns").astype(np.float64) return np_timedelta64_to_float(array, datetime_units) def pd_timedeltaindex_to_float(array, datetime_units): + """Convert pandas.TimedeltaIndex to float.""" return np_timedelta64_to_float(array.values, datetime_units) def py_timedelta_to_float(array, datetime_unit): """Convert a timedelta object to a float, possibly at a loss of resolution. - - Notes - ----- - With Numpy >= 1.17, it's possible to convert directly from `datetime.timedelta` - to `numpy.timedelta64` at the microsecond (us) resolution. This covers a fairly - large span of years. - - With earlier Numpy versions, the conversion only works at the nanosecond resolution, - which restricts the span that can be covered. """ - if LooseVersion(np.__version__) < LooseVersion("1.17"): - array = np.asarray(array) - array = ( - np.reshape([a.total_seconds() for a in array.ravel()], array.shape) * 1e6 - ) - else: - array = np.asarray(array).astype("timedelta64[us]").astype(np.float64) # [us] - + array = np.asarray(array) + array = ( + np.reshape([a.total_seconds() for a in array.ravel()], array.shape) * 1e6 + ) conversion_factor = np.timedelta64(1, "us") / np.timedelta64(1, datetime_unit) return conversion_factor * array diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index d18ea5bc8c7..b4e057e2481 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -693,10 +693,7 @@ def test_datetime_to_numeric_cftime(): @requires_cftime -@pytest.mark.xfail( - np.__version__ < "1.17", reason="Limited Numpy support for timedelta conversion." -) -def test_datetime_to_numeric_overflow(): +def test_datetime_to_numeric_potential_overflow(): import cftime times = pd.date_range("2000", periods=5, freq="7D").values.astype("datetime64[us]") diff --git a/xarray/tests/test_interp.py b/xarray/tests/test_interp.py index bc985fa37fc..e3af8b5873a 100644 --- a/xarray/tests/test_interp.py +++ b/xarray/tests/test_interp.py @@ -664,10 +664,6 @@ def test_datetime_interp_noerror(): a.interp(x=xi, time=xi.time) # should not raise an error -@pytest.mark.xfail( - np.__version__ < "1.17", - reason="Numpy support for conversion from timedelta objects to timedelta64.", -) @requires_cftime def test_3641(): times = xr.cftime_range("0001", periods=3, freq="500Y") diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index e868ca16c36..a947b1b4ab0 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -514,7 +514,7 @@ def test_get_clean_interp_index_dt(cf_da, calendar, freq): np.testing.assert_array_equal(gi, si) -def test_get_clean_interp_index_overflow(): +def test_get_clean_interp_index_potential_overflow(): da = xr.DataArray( [0, 1, 2], dims=("time",), From 2df2b294896acf08ada151059ca1ea8086ee0b60 Mon Sep 17 00:00:00 2001 From: David Huard Date: Mon, 20 Jan 2020 11:33:57 -0500 Subject: [PATCH 22/36] pep8 --- xarray/core/duck_array_ops.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 021761a50bf..8098badc7a5 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -7,7 +7,6 @@ import inspect import warnings from functools import partial -from distutils.version import LooseVersion import numpy as np import pandas as pd @@ -510,9 +509,7 @@ def py_timedelta_to_float(array, datetime_unit): """Convert a timedelta object to a float, possibly at a loss of resolution. """ array = np.asarray(array) - array = ( - np.reshape([a.total_seconds() for a in array.ravel()], array.shape) * 1e6 - ) + array = np.reshape([a.total_seconds() for a in array.ravel()], array.shape) * 1e6 conversion_factor = np.timedelta64(1, "us") / np.timedelta64(1, datetime_unit) return conversion_factor * array From 31f541743c4a03c7c9aa4f6aaf3a07ee06f04654 Mon Sep 17 00:00:00 2001 From: David Huard Date: Mon, 20 Jan 2020 11:53:06 -0500 Subject: [PATCH 23/36] updated whats new --- doc/whats-new.rst | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index fef1b988f01..4e48d4098ba 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -23,8 +23,8 @@ Breaking changes ~~~~~~~~~~~~~~~~ - Remove ``compat`` and ``encoding`` kwargs from ``DataArray``, which - have been deprecated since 0.12. (:pull:`3650`). - Instead, specify the encoding when writing to disk or set + have been deprecated since 0.12. (:pull:`3650`). + Instead, specify the encoding when writing to disk or set the ``encoding`` attribute directly. By `Maximilian Roos `_ @@ -48,10 +48,14 @@ New Features - :py:meth:`Dataset.swap_dims` and :py:meth:`DataArray.swap_dims` now allow swapping to dimension names that don't exist yet. (:pull:`3636`) By `Justus Magin `_. -- Extend :py:class:`core.accessor_dt.DatetimeAccessor` properties - and support `.dt` accessor for timedelta +- Extend :py:class:`core.accessor_dt.DatetimeAccessor` properties + and support `.dt` accessor for timedelta via :py:class:`core.accessor_dt.TimedeltaAccessor` (:pull:`3612`) By `Anderson Banihirwe `_. +- Define 1970-01-01 as the default offset for the interpolation index for both + normal and CF-time indexes, use microseconds in the conversion from timedelta + objects to floats to avoid overflow errors. (:issue:`3641`, :pull:`3631`) + By David Huard ``_. Bug fixes ~~~~~~~~~ From 2974af9d576d13b046f255e2d2c3138dd9823373 Mon Sep 17 00:00:00 2001 From: David Huard Date: Mon, 20 Jan 2020 13:30:53 -0500 Subject: [PATCH 24/36] Update doc/whats-new.rst Co-Authored-By: Spencer Clark --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 4e48d4098ba..e6f44f44b6e 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -53,7 +53,7 @@ New Features via :py:class:`core.accessor_dt.TimedeltaAccessor` (:pull:`3612`) By `Anderson Banihirwe `_. - Define 1970-01-01 as the default offset for the interpolation index for both - normal and CF-time indexes, use microseconds in the conversion from timedelta + normal and CF-time indexes in :py:meth:`DataArray.interpolate_na`, use microseconds in the conversion from timedelta objects to floats to avoid overflow errors. (:issue:`3641`, :pull:`3631`) By David Huard ``_. From eeb50749acaae4df65aa1d478cc3875fbb0c9298 Mon Sep 17 00:00:00 2001 From: David Huard Date: Mon, 20 Jan 2020 14:01:02 -0500 Subject: [PATCH 25/36] update interpolate_na docstrings --- doc/whats-new.rst | 7 ++++--- xarray/core/dataarray.py | 6 ++++-- xarray/core/dataset.py | 6 ++++-- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index e6f44f44b6e..26b42a1df21 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -52,9 +52,10 @@ New Features and support `.dt` accessor for timedelta via :py:class:`core.accessor_dt.TimedeltaAccessor` (:pull:`3612`) By `Anderson Banihirwe `_. -- Define 1970-01-01 as the default offset for the interpolation index for both - normal and CF-time indexes in :py:meth:`DataArray.interpolate_na`, use microseconds in the conversion from timedelta - objects to floats to avoid overflow errors. (:issue:`3641`, :pull:`3631`) +- Support CFTimeIndex in :py:meth:`DataArray.interpolate_na`, define 1970-01-01 + as the default offset for the interpolation index for both DatetimeIndex and + CFTimeIndex, use microseconds in the conversion from timedelta objects + to floats to avoid overflow errors (:issue:`3641`, :pull:`3631`). By David Huard ``_. Bug fixes diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 0e67a791834..f0598832da1 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -18,6 +18,7 @@ cast, ) +import datetime as dt import numpy as np import pandas as pd @@ -2041,7 +2042,7 @@ def interpolate_na( method: str = "linear", limit: int = None, use_coordinate: Union[bool, str] = True, - max_gap: Union[int, float, str, pd.Timedelta, np.timedelta64] = None, + max_gap: Union[int, float, str, pd.Timedelta, np.timedelta64, dt.timedelta] = None, **kwargs: Any, ) -> "DataArray": """Fill in NaNs by interpolating according to different methods. @@ -2073,7 +2074,7 @@ def interpolate_na( or None for no limit. This filling is done regardless of the size of the gap in the data. To only interpolate over gaps less than a given length, see ``max_gap``. - max_gap: int, float, str, pandas.Timedelta, numpy.timedelta64, default None. + max_gap: int, float, str, pandas.Timedelta, numpy.timedelta64, datetime.timedelta, default None. Maximum size of gap, a continuous sequence of NaNs, that will be filled. Use None for no limit. When interpolating along a datetime64 dimension and ``use_coordinate=True``, ``max_gap`` can be one of the following: @@ -2081,6 +2082,7 @@ def interpolate_na( - a string that is valid input for pandas.to_timedelta - a :py:class:`numpy.timedelta64` object - a :py:class:`pandas.Timedelta` object + - a :py:class:`datetime.timedelta` object Otherwise, ``max_gap`` must be an int or a float. Use of ``max_gap`` with unlabeled dimensions has not been implemented yet. Gap length is defined as the difference diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 82ddc8a535f..0a75e35d2ed 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -27,6 +27,7 @@ cast, ) +import datetime as dt import numpy as np import pandas as pd @@ -3994,7 +3995,7 @@ def interpolate_na( method: str = "linear", limit: int = None, use_coordinate: Union[bool, Hashable] = True, - max_gap: Union[int, float, str, pd.Timedelta, np.timedelta64] = None, + max_gap: Union[int, float, str, pd.Timedelta, np.timedelta64, dt.timedelta] = None, **kwargs: Any, ) -> "Dataset": """Fill in NaNs by interpolating according to different methods. @@ -4027,7 +4028,7 @@ def interpolate_na( or None for no limit. This filling is done regardless of the size of the gap in the data. To only interpolate over gaps less than a given length, see ``max_gap``. - max_gap: int, float, str, pandas.Timedelta, numpy.timedelta64, default None. + max_gap: int, float, str, pandas.Timedelta, numpy.timedelta64, datetime.timedelta, default None. Maximum size of gap, a continuous sequence of NaNs, that will be filled. Use None for no limit. When interpolating along a datetime64 dimension and ``use_coordinate=True``, ``max_gap`` can be one of the following: @@ -4035,6 +4036,7 @@ def interpolate_na( - a string that is valid input for pandas.to_timedelta - a :py:class:`numpy.timedelta64` object - a :py:class:`pandas.Timedelta` object + - a :py:class:`datetime.timedelta` object Otherwise, ``max_gap`` must be an int or a float. Use of ``max_gap`` with unlabeled dimensions has not been implemented yet. Gap length is defined as the difference From 6b9631f94f8420822b86eeca22aff8888778d46f Mon Sep 17 00:00:00 2001 From: David Huard Date: Mon, 20 Jan 2020 14:01:38 -0500 Subject: [PATCH 26/36] black --- xarray/core/dataarray.py | 4 +++- xarray/core/dataset.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index f0598832da1..8008929c802 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2042,7 +2042,9 @@ def interpolate_na( method: str = "linear", limit: int = None, use_coordinate: Union[bool, str] = True, - max_gap: Union[int, float, str, pd.Timedelta, np.timedelta64, dt.timedelta] = None, + max_gap: Union[ + int, float, str, pd.Timedelta, np.timedelta64, dt.timedelta + ] = None, **kwargs: Any, ) -> "DataArray": """Fill in NaNs by interpolating according to different methods. diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 0a75e35d2ed..ce7c09dab70 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3995,7 +3995,9 @@ def interpolate_na( method: str = "linear", limit: int = None, use_coordinate: Union[bool, Hashable] = True, - max_gap: Union[int, float, str, pd.Timedelta, np.timedelta64, dt.timedelta] = None, + max_gap: Union[ + int, float, str, pd.Timedelta, np.timedelta64, dt.timedelta + ] = None, **kwargs: Any, ) -> "Dataset": """Fill in NaNs by interpolating according to different methods. From 5656fdb62e9392381b20e46fc8ba2403ae151cd8 Mon Sep 17 00:00:00 2001 From: David Huard Date: Mon, 20 Jan 2020 14:24:51 -0500 Subject: [PATCH 27/36] dt conflicts with accessor --- xarray/core/dataarray.py | 4 ++-- xarray/core/dataset.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 8008929c802..b65afc59892 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -18,7 +18,7 @@ cast, ) -import datetime as dt +import datetime import numpy as np import pandas as pd @@ -2043,7 +2043,7 @@ def interpolate_na( limit: int = None, use_coordinate: Union[bool, str] = True, max_gap: Union[ - int, float, str, pd.Timedelta, np.timedelta64, dt.timedelta + int, float, str, pd.Timedelta, np.timedelta64, datetime.timedelta ] = None, **kwargs: Any, ) -> "DataArray": diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index ce7c09dab70..033d227171e 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -27,7 +27,7 @@ cast, ) -import datetime as dt +import datetime import numpy as np import pandas as pd @@ -3996,7 +3996,7 @@ def interpolate_na( limit: int = None, use_coordinate: Union[bool, Hashable] = True, max_gap: Union[ - int, float, str, pd.Timedelta, np.timedelta64, dt.timedelta + int, float, str, pd.Timedelta, np.timedelta64, datetime.timedelta ] = None, **kwargs: Any, ) -> "Dataset": From dcf98ffa339ed3a4301a0a9b3debffc337633310 Mon Sep 17 00:00:00 2001 From: David Huard Date: Fri, 24 Jan 2020 15:27:28 -0500 Subject: [PATCH 28/36] replaced assert_equal by assert_allclose --- xarray/tests/test_missing.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index a947b1b4ab0..fcb9a2e1e8c 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -16,6 +16,7 @@ from xarray.tests import ( assert_array_equal, assert_equal, + assert_allclose, raises_regex, requires_bottleneck, requires_dask, @@ -565,7 +566,7 @@ def test_interpolate_na_max_gap_time_specifier( da_time.copy(data=[np.nan, 1, 2, 3, 4, 5, np.nan, np.nan, np.nan, np.nan, 10]) ) actual = transform(da_time).interpolate_na("t", max_gap=max_gap) - assert_equal(actual, expected) + assert_allclose(actual, expected) @requires_bottleneck From 4842a9627a76f14abbc68d64603b85e19b1eb661 Mon Sep 17 00:00:00 2001 From: David Huard Date: Sat, 25 Jan 2020 16:50:04 -0500 Subject: [PATCH 29/36] Update xarray/core/duck_array_ops.py Co-Authored-By: Spencer Clark --- xarray/core/duck_array_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 8098badc7a5..67e462901c9 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -483,7 +483,7 @@ def np_timedelta64_to_float(array, datetime_unit): The array is first converted to microseconds, which is less likely to cause overflow errors. """ - array = array.astype("timedelta64[us]") + array = array.astype("timedelta64[ns]").astype(np.float64) conversion_factor = np.timedelta64(1, "us") / np.timedelta64(1, datetime_unit) return conversion_factor * array From 6dbf225232ca961abc1d6efd400343bce8c95342 Mon Sep 17 00:00:00 2001 From: David Huard Date: Sat, 25 Jan 2020 16:50:57 -0500 Subject: [PATCH 30/36] Update xarray/core/duck_array_ops.py Co-Authored-By: Spencer Clark --- xarray/core/duck_array_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 67e462901c9..e95c6b9d5d5 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -496,7 +496,7 @@ def pd_timedelta_to_float(array, datetime_units): Built on the assumption that pandas timedelta values are in nanoseconds, which is also the numpy default resolution. """ - array = np.timedelta64(array.value, "ns").astype(np.float64) + array = array.to_timedelta64() return np_timedelta64_to_float(array, datetime_units) From c90dc977e718db139f0542025a5853e445eeb8be Mon Sep 17 00:00:00 2001 From: David Huard Date: Sat, 25 Jan 2020 17:55:39 -0500 Subject: [PATCH 31/36] renamed array to value in timedelta_to_numeric. Added tests --- xarray/core/duck_array_ops.py | 34 ++++++++++++++--------------- xarray/tests/test_duck_array_ops.py | 20 +++++++++++++++++ xarray/tests/test_missing.py | 2 +- 3 files changed, 38 insertions(+), 18 deletions(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index e95c6b9d5d5..0156a0a34a1 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -430,12 +430,12 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float): return np.where(isnull(array), np.nan, array.astype(dtype)) -def timedelta_to_numeric(array, datetime_unit="ns", dtype=float): - """Convert an array containing timedelta-like data to numerical values. +def timedelta_to_numeric(value, datetime_unit="ns", dtype=float): + """Convert a timedelta-like object to numerical values. Parameters ---------- - array : datetime.timedelta, numpy.timedelta64, pandas.Timedelta, pandas.TimedeltaIndex, str + value : datetime.timedelta, numpy.timedelta64, pandas.Timedelta, pandas.TimedeltaIndex, str Time delta representation. datetime_unit : {Y, M, W, D, h, m, s, ms, us, ns, ps, fs, as} The time units of the output values. Note that some conversions are not allowed due to @@ -446,26 +446,26 @@ def timedelta_to_numeric(array, datetime_unit="ns", dtype=float): """ import datetime as dt - if isinstance(array, dt.timedelta): - out = py_timedelta_to_float(array, datetime_unit) - elif isinstance(array, np.timedelta64): - out = np_timedelta64_to_float(array, datetime_unit) - elif isinstance(array, pd.Timedelta): - out = pd_timedelta_to_float(array, datetime_unit) - elif isinstance(array, pd.TimedeltaIndex): - out = pd_timedeltaindex_to_float(array, datetime_unit) - elif isinstance(array, str): + if isinstance(value, dt.timedelta): + out = py_timedelta_to_float(value, datetime_unit) + elif isinstance(value, np.timedelta64): + out = np_timedelta64_to_float(value, datetime_unit) + elif isinstance(value, pd.Timedelta): + out = pd_timedelta_to_float(value, datetime_unit) + elif isinstance(value, pd.TimedeltaIndex): + out = pd_timedeltaindex_to_float(value, datetime_unit) + elif isinstance(value, str): try: - a = pd.to_timedelta(array) + a = pd.to_timedelta(value) except ValueError: raise ValueError( - f"Could not convert {array!r} to timedelta64 using pandas.to_timedelta" + f"Could not convert {value!r} to timedelta64 using pandas.to_timedelta" ) return py_timedelta_to_float(a, datetime_unit) else: raise TypeError( - f"Expected array of type str, pandas.Timedelta, pandas.TimedeltaIndex, " - f"datetime.timedelta or numpy.timedelta64, but received {type(array).__name__}" + f"Expected value of type str, pandas.Timedelta, pandas.TimedeltaIndex, " + f"datetime.timedelta or numpy.timedelta64, but received {type(value).__name__}" ) return out.astype(dtype) @@ -484,7 +484,7 @@ def np_timedelta64_to_float(array, datetime_unit): cause overflow errors. """ array = array.astype("timedelta64[ns]").astype(np.float64) - conversion_factor = np.timedelta64(1, "us") / np.timedelta64(1, datetime_unit) + conversion_factor = np.timedelta64(1, "ns") / np.timedelta64(1, datetime_unit) return conversion_factor * array diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index b4e057e2481..44f0433e3d1 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -21,6 +21,7 @@ stack, where, py_timedelta_to_float, + timedelta_to_numeric ) from xarray.core.pycompat import dask_array_type from xarray.testing import assert_allclose, assert_equal @@ -725,3 +726,22 @@ def test_py_timedelta_to_float(): assert py_timedelta_to_float(dt.timedelta(days=1e6), "ms") == 86400 * 1e9 assert py_timedelta_to_float(dt.timedelta(days=1e6), "s") == 86400 * 1e6 assert py_timedelta_to_float(dt.timedelta(days=1e6), "D") == 1e6 + +@pytest.mark.parametrize( + "td", + [ + dt.timedelta(days=1), + np.timedelta64(1, "D"), + pd.Timedelta(1, "D"), + pd.TimedeltaIndex([1, ], "D"), + "1 day", + ], +) +def test_timedelta_to_numeric(td): + # Scalar input + out = timedelta_to_numeric(td, "ns") + np.testing.assert_allclose(out, 86400 * 1e9) + if isinstance(td, pd.TimedeltaIndex): + assert isinstance(out[0], float) + else: + assert isinstance(out, float) diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index fcb9a2e1e8c..8d70d9a0fcc 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -542,7 +542,7 @@ def test_interpolate_na_max_gap_errors(da_time): da_time.interpolate_na("t", max_gap=(1,)) da_time["t"] = pd.date_range("2001-01-01", freq="H", periods=11) - with raises_regex(TypeError, "Expected array of type str"): + with raises_regex(TypeError, "Expected value of type str"): da_time.interpolate_na("t", max_gap=1) with raises_regex(TypeError, "Expected integer or floating point"): From 71fb87df7784bda5471662a59f32a625ca4d0784 Mon Sep 17 00:00:00 2001 From: David Huard Date: Sat, 25 Jan 2020 18:00:34 -0500 Subject: [PATCH 32/36] removed support for TimedeltaIndex in timedelta_to_numeric --- xarray/core/duck_array_ops.py | 8 +++----- xarray/tests/test_duck_array_ops.py | 6 +----- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 0156a0a34a1..e4210c2fbfb 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -435,7 +435,7 @@ def timedelta_to_numeric(value, datetime_unit="ns", dtype=float): Parameters ---------- - value : datetime.timedelta, numpy.timedelta64, pandas.Timedelta, pandas.TimedeltaIndex, str + value : datetime.timedelta, numpy.timedelta64, pandas.Timedelta, str Time delta representation. datetime_unit : {Y, M, W, D, h, m, s, ms, us, ns, ps, fs, as} The time units of the output values. Note that some conversions are not allowed due to @@ -452,8 +452,6 @@ def timedelta_to_numeric(value, datetime_unit="ns", dtype=float): out = np_timedelta64_to_float(value, datetime_unit) elif isinstance(value, pd.Timedelta): out = pd_timedelta_to_float(value, datetime_unit) - elif isinstance(value, pd.TimedeltaIndex): - out = pd_timedeltaindex_to_float(value, datetime_unit) elif isinstance(value, str): try: a = pd.to_timedelta(value) @@ -464,8 +462,8 @@ def timedelta_to_numeric(value, datetime_unit="ns", dtype=float): return py_timedelta_to_float(a, datetime_unit) else: raise TypeError( - f"Expected value of type str, pandas.Timedelta, pandas.TimedeltaIndex, " - f"datetime.timedelta or numpy.timedelta64, but received {type(value).__name__}" + f"Expected value of type str, pandas.Timedelta, datetime.timedelta " + f"or numpy.timedelta64, but received {type(value).__name__}" ) return out.astype(dtype) diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index 44f0433e3d1..2f0c9949108 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -733,7 +733,6 @@ def test_py_timedelta_to_float(): dt.timedelta(days=1), np.timedelta64(1, "D"), pd.Timedelta(1, "D"), - pd.TimedeltaIndex([1, ], "D"), "1 day", ], ) @@ -741,7 +740,4 @@ def test_timedelta_to_numeric(td): # Scalar input out = timedelta_to_numeric(td, "ns") np.testing.assert_allclose(out, 86400 * 1e9) - if isinstance(td, pd.TimedeltaIndex): - assert isinstance(out[0], float) - else: - assert isinstance(out, float) + assert isinstance(out, float) From 3d9f3330cfc4bcb4114e702e64319d2ef097d423 Mon Sep 17 00:00:00 2001 From: David Huard Date: Sun, 26 Jan 2020 03:53:09 -0500 Subject: [PATCH 33/36] added tests for np_timedelta64_to_float and pd_timedelta_to_float. renamed array to value for pd_timedelta_to_float. removed pd_timedeltaindex_to_float. --- xarray/core/duck_array_ops.py | 11 +++-------- xarray/tests/test_duck_array_ops.py | 20 ++++++++++++++++++++ 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index e4210c2fbfb..c2fe604a9d3 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -486,7 +486,7 @@ def np_timedelta64_to_float(array, datetime_unit): return conversion_factor * array -def pd_timedelta_to_float(array, datetime_units): +def pd_timedelta_to_float(value, datetime_unit): """Convert pandas.Timedelta to float. Notes @@ -494,13 +494,8 @@ def pd_timedelta_to_float(array, datetime_units): Built on the assumption that pandas timedelta values are in nanoseconds, which is also the numpy default resolution. """ - array = array.to_timedelta64() - return np_timedelta64_to_float(array, datetime_units) - - -def pd_timedeltaindex_to_float(array, datetime_units): - """Convert pandas.TimedeltaIndex to float.""" - return np_timedelta64_to_float(array.values, datetime_units) + value = value.to_timedelta64() + return np_timedelta64_to_float(value, datetime_unit) def py_timedelta_to_float(array, datetime_unit): diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index 2f0c9949108..2db145f1ecc 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -21,6 +21,8 @@ stack, where, py_timedelta_to_float, + np_timedelta64_to_float, + pd_timedelta_to_float, timedelta_to_numeric ) from xarray.core.pycompat import dask_array_type @@ -727,6 +729,24 @@ def test_py_timedelta_to_float(): assert py_timedelta_to_float(dt.timedelta(days=1e6), "s") == 86400 * 1e6 assert py_timedelta_to_float(dt.timedelta(days=1e6), "D") == 1e6 + +@pytest.mark.parametrize("td, expected", ([np.timedelta64(1, "D"), 86400*1E9], [np.timedelta64(1, "ns"), 1.])) +def test_np_timedelta64_to_float(td, expected): + out = np_timedelta64_to_float(td, datetime_unit="ns") + np.testing.assert_allclose(out, expected) + assert isinstance(out, float) + + out = np_timedelta64_to_float(np.atleast_1d(td), datetime_unit="ns") + np.testing.assert_allclose(out, expected) + + +@pytest.mark.parametrize("td, expected", ([pd.Timedelta(1, "D"), 86400*1E9], [pd.Timedelta(1, "ns"), 1.])) +def test_pd_timedelta_to_float(td, expected): + out = pd_timedelta_to_float(td, datetime_unit="ns") + np.testing.assert_allclose(out, expected) + assert isinstance(out, float) + + @pytest.mark.parametrize( "td", [ From b04785c374da1384e4f710f4a71c372c726e78db Mon Sep 17 00:00:00 2001 From: David Huard Date: Sun, 26 Jan 2020 03:54:15 -0500 Subject: [PATCH 34/36] black --- xarray/tests/test_duck_array_ops.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index 2db145f1ecc..ceb20f9a724 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -23,7 +23,7 @@ py_timedelta_to_float, np_timedelta64_to_float, pd_timedelta_to_float, - timedelta_to_numeric + timedelta_to_numeric, ) from xarray.core.pycompat import dask_array_type from xarray.testing import assert_allclose, assert_equal @@ -730,7 +730,10 @@ def test_py_timedelta_to_float(): assert py_timedelta_to_float(dt.timedelta(days=1e6), "D") == 1e6 -@pytest.mark.parametrize("td, expected", ([np.timedelta64(1, "D"), 86400*1E9], [np.timedelta64(1, "ns"), 1.])) +@pytest.mark.parametrize( + "td, expected", + ([np.timedelta64(1, "D"), 86400 * 1e9], [np.timedelta64(1, "ns"), 1.0]), +) def test_np_timedelta64_to_float(td, expected): out = np_timedelta64_to_float(td, datetime_unit="ns") np.testing.assert_allclose(out, expected) @@ -740,7 +743,9 @@ def test_np_timedelta64_to_float(td, expected): np.testing.assert_allclose(out, expected) -@pytest.mark.parametrize("td, expected", ([pd.Timedelta(1, "D"), 86400*1E9], [pd.Timedelta(1, "ns"), 1.])) +@pytest.mark.parametrize( + "td, expected", ([pd.Timedelta(1, "D"), 86400 * 1e9], [pd.Timedelta(1, "ns"), 1.0]) +) def test_pd_timedelta_to_float(td, expected): out = pd_timedelta_to_float(td, datetime_unit="ns") np.testing.assert_allclose(out, expected) @@ -749,12 +754,7 @@ def test_pd_timedelta_to_float(td, expected): @pytest.mark.parametrize( "td", - [ - dt.timedelta(days=1), - np.timedelta64(1, "D"), - pd.Timedelta(1, "D"), - "1 day", - ], + [dt.timedelta(days=1), np.timedelta64(1, "D"), pd.Timedelta(1, "D"), "1 day",], ) def test_timedelta_to_numeric(td): # Scalar input From d24cae41a230292b3b225d454006aa3c0d430b0c Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sun, 26 Jan 2020 08:24:05 -0500 Subject: [PATCH 35/36] Fix flake8 error --- xarray/tests/test_duck_array_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index ceb20f9a724..7c5c284a04b 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -754,7 +754,7 @@ def test_pd_timedelta_to_float(td, expected): @pytest.mark.parametrize( "td", - [dt.timedelta(days=1), np.timedelta64(1, "D"), pd.Timedelta(1, "D"), "1 day",], + [dt.timedelta(days=1), np.timedelta64(1, "D"), pd.Timedelta(1, "D"), "1 day"], ) def test_timedelta_to_numeric(td): # Scalar input From 6f0c5042c955ba26adceaa6fb3c1db665204ca38 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sun, 26 Jan 2020 08:27:02 -0500 Subject: [PATCH 36/36] black --- xarray/tests/test_duck_array_ops.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index 7c5c284a04b..96c883baa67 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -753,8 +753,7 @@ def test_pd_timedelta_to_float(td, expected): @pytest.mark.parametrize( - "td", - [dt.timedelta(days=1), np.timedelta64(1, "D"), pd.Timedelta(1, "D"), "1 day"], + "td", [dt.timedelta(days=1), np.timedelta64(1, "D"), pd.Timedelta(1, "D"), "1 day"], ) def test_timedelta_to_numeric(td): # Scalar input