From db94bb9d5f80ae613f7c33cddca89a30e393aa57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Thu, 31 Aug 2023 12:45:30 +0200 Subject: [PATCH 01/17] preserve nanosecond resolution when encoding/decoding times. --- xarray/backends/netcdf3.py | 7 ++++ xarray/coding/times.py | 70 ++++++++++++++++++++++++++++---------- xarray/coding/variables.py | 23 +++++++++++-- 3 files changed, 79 insertions(+), 21 deletions(-) diff --git a/xarray/backends/netcdf3.py b/xarray/backends/netcdf3.py index ef389eefc90..fd741ce935e 100644 --- a/xarray/backends/netcdf3.py +++ b/xarray/backends/netcdf3.py @@ -62,6 +62,13 @@ def coerce_nc3_dtype(arr): dtype = str(arr.dtype) if dtype in _nc3_dtype_coercions: new_dtype = _nc3_dtype_coercions[dtype] + # check if this looks like a time with NaT + # and transform to float64 + if np.issubdtype(dtype, np.int64): + mask = arr == np.iinfo(np.int64).min + if mask.any(): + arr = np.where(mask, np.nan, arr) + return arr # TODO: raise a warning whenever casting the data-type instead? cast_arr = arr.astype(new_dtype) if not (cast_arr == arr).all(): diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 3745d61acc0..af98f219b78 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -171,6 +171,20 @@ def _unpack_netcdf_time_units(units: str) -> tuple[str, str]: return delta_units, ref_date +def _unpack_delta_ref_date(units): + # same us _unpack_netcdf_time_units but finalizes ref_date for + # processing in encode_cf_datetime + delta, _ref_date = _unpack_netcdf_time_units(units) + # TODO: the strict enforcement of nanosecond precision Timestamps can be + # relaxed when addressing GitHub issue #7493. + ref_date = nanosecond_precision_timestamp(_ref_date) + # If the ref_date Timestamp is timezone-aware, convert to UTC and + # make it timezone-naive (GH 2649). + if ref_date.tz is not None: + ref_date = ref_date.tz_convert(None) + return delta, ref_date + + def _decode_cf_datetime_dtype( data, units: str, calendar: str, use_cftime: bool | None ) -> np.dtype: @@ -251,9 +265,12 @@ def _decode_datetime_with_pandas( # Cast input ordinals to integers of nanoseconds because pd.to_timedelta # works much faster when dealing with integers (GH 1399). - flat_num_dates_ns_int = (flat_num_dates * _NS_PER_TIME_DELTA[delta]).astype( - np.int64 - ) + # properly handle NaN/NaT to prevent casting NaN to int + nan = np.isnan(flat_num_dates) | (flat_num_dates == np.iinfo(np.int64).min) + flat_num_dates = flat_num_dates * _NS_PER_TIME_DELTA[delta] + flat_num_dates_ns_int = np.zeros_like(flat_num_dates, dtype=np.int64) + flat_num_dates_ns_int[nan] = np.iinfo(np.int64).min + flat_num_dates_ns_int[~nan] = flat_num_dates[~nan].astype(np.int64) # Use pd.to_timedelta to safely cast integer values to timedeltas, # and add those to a Timestamp to safely produce a DatetimeIndex. This @@ -575,6 +592,9 @@ def _should_cftime_be_used( def _cleanup_netcdf_time_units(units: str) -> str: delta, ref_date = _unpack_netcdf_time_units(units) + delta = delta.lower() + if not delta.endswith("s"): + delta = f"{delta}s" try: units = f"{delta} since {format_timestamp(ref_date)}" except (OutOfBoundsDatetime, ValueError): @@ -635,32 +655,41 @@ def encode_cf_datetime( """ dates = np.asarray(dates) + data_units = infer_datetime_units(dates) + if units is None: - units = infer_datetime_units(dates) + units = data_units else: units = _cleanup_netcdf_time_units(units) if calendar is None: calendar = infer_calendar_name(dates) - delta, _ref_date = _unpack_netcdf_time_units(units) try: if not _is_standard_calendar(calendar) or dates.dtype.kind == "O": # parse with cftime instead raise OutOfBoundsDatetime assert dates.dtype == "datetime64[ns]" + delta, ref_date = _unpack_delta_ref_date(units) delta_units = _netcdf_to_numpy_timeunit(delta) time_delta = np.timedelta64(1, delta_units).astype("timedelta64[ns]") - # TODO: the strict enforcement of nanosecond precision Timestamps can be - # relaxed when addressing GitHub issue #7493. - ref_date = nanosecond_precision_timestamp(_ref_date) - - # If the ref_date Timestamp is timezone-aware, convert to UTC and - # make it timezone-naive (GH 2649). - if ref_date.tz is not None: - ref_date = ref_date.tz_convert(None) + # check if times can be represented with given units + if data_units != units: + data_delta, data_ref_date = _unpack_delta_ref_date(data_units) + needed_delta = _infer_time_units_from_diff( + (data_ref_date - ref_date).to_timedelta64() + ) + needed_time_delta = np.timedelta64( + 1, _netcdf_to_numpy_timeunit(needed_delta) + ).astype("timedelta64[ns]") + if needed_delta != delta and time_delta > needed_time_delta: + warnings.warn( + f"Times can't be serialized faithfully with requested units {units!r}. " + f"Resolution of {needed_delta!r} needed. " + f"Serializing timeseries to floating point." + ) # Wrap the dates in a DatetimeIndex to do the subtraction to ensure # an OverflowError is raised if the ref_date is too far away from @@ -670,8 +699,12 @@ def encode_cf_datetime( # Use floor division if time_delta evenly divides all differences # to preserve integer dtype if possible (GH 4045). - if np.all(time_deltas % time_delta == np.timedelta64(0, "ns")): - num = time_deltas // time_delta + # NaT prevents us from using datetime64 directly, but we can safely coerce + # to int64 in presence of NaT, so we just dropna before check (GH 7817). + if np.all(time_deltas.dropna() % time_delta == np.timedelta64(0, "ns")): + # calculate int64 floor division + num = time_deltas // time_delta.astype(np.int64) + num = num.astype(np.int64, copy=False) else: num = time_deltas / time_delta num = num.values.reshape(dates.shape) @@ -704,9 +737,10 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable: ) or contains_cftime_datetimes(variable): dims, data, attrs, encoding = unpack_for_encoding(variable) - (data, units, calendar) = encode_cf_datetime( - data, encoding.pop("units", None), encoding.pop("calendar", None) - ) + units = encoding.pop("units", None) + calendar = encoding.pop("calendar", None) + (data, units, calendar) = encode_cf_datetime(data, units, calendar) + safe_setitem(attrs, "units", units, name=name) safe_setitem(attrs, "calendar", calendar, name=name) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 8ba7dcbb0e2..c5dcfc7d9c5 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -236,19 +236,32 @@ def encode(self, variable: Variable, name: T_Name = None): f"Variable {name!r} has conflicting _FillValue ({fv}) and missing_value ({mv}). Cannot encode data." ) + # special case DateTime to properly handle NaT + is_date = "since" in attrs.get("units", "") + if fv_exists: # Ensure _FillValue is cast to same dtype as data's encoding["_FillValue"] = dtype.type(fv) fill_value = pop_to(encoding, attrs, "_FillValue", name=name) if not pd.isnull(fill_value): - data = duck_array_ops.fillna(data, fill_value) + if is_date: + data = duck_array_ops.where( + data != np.iinfo(np.int64).min, data, fill_value + ) + else: + data = duck_array_ops.fillna(data, fill_value) if mv_exists: # Ensure missing_value is cast to same dtype as data's encoding["missing_value"] = dtype.type(mv) fill_value = pop_to(encoding, attrs, "missing_value", name=name) if not pd.isnull(fill_value) and not fv_exists: - data = duck_array_ops.fillna(data, fill_value) + if is_date: + data = duck_array_ops.where( + data != np.iinfo(np.int64).min, data, fill_value + ) + else: + data = duck_array_ops.fillna(data, fill_value) return Variable(dims, data, attrs, encoding, fastpath=True) @@ -275,7 +288,11 @@ def decode(self, variable: Variable, name: T_Name = None): stacklevel=3, ) - dtype, decoded_fill_value = dtypes.maybe_promote(data.dtype) + # special case DateTime to properly handle NaT + if "since" in str(attrs.get("units", "")): + dtype, decoded_fill_value = np.int64, np.iinfo(np.int64).min + else: + dtype, decoded_fill_value = dtypes.maybe_promote(data.dtype) if encoded_fill_values: transform = partial( From 82b74ecbb4db704bdbc2bc3af54dd51f3d65a665 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Mon, 4 Sep 2023 07:49:10 +0200 Subject: [PATCH 02/17] Apply suggestions from code review Co-authored-by: Spencer Clark --- xarray/coding/variables.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index c5dcfc7d9c5..3c9bf6eda7c 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -244,7 +244,7 @@ def encode(self, variable: Variable, name: T_Name = None): encoding["_FillValue"] = dtype.type(fv) fill_value = pop_to(encoding, attrs, "_FillValue", name=name) if not pd.isnull(fill_value): - if is_date: + if is_date and data.dtype.kind in "iu": data = duck_array_ops.where( data != np.iinfo(np.int64).min, data, fill_value ) @@ -256,7 +256,7 @@ def encode(self, variable: Variable, name: T_Name = None): encoding["missing_value"] = dtype.type(mv) fill_value = pop_to(encoding, attrs, "missing_value", name=name) if not pd.isnull(fill_value) and not fv_exists: - if is_date: + if is_date and data.dtype.kind in "iu": data = duck_array_ops.where( data != np.iinfo(np.int64).min, data, fill_value ) @@ -289,7 +289,7 @@ def decode(self, variable: Variable, name: T_Name = None): ) # special case DateTime to properly handle NaT - if "since" in str(attrs.get("units", "")): + if "since" in str(attrs.get("units", "")) and data.dtype.kind in "iu": dtype, decoded_fill_value = np.int64, np.iinfo(np.int64).min else: dtype, decoded_fill_value = dtypes.maybe_promote(data.dtype) From 8da55ac2396a1b736ad60727eb870d266607875c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Mon, 4 Sep 2023 07:50:43 +0200 Subject: [PATCH 03/17] use emit_user_level_warning --- xarray/coding/times.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index af98f219b78..cb6a34c4ddf 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -25,6 +25,7 @@ from xarray.core.formatting import first_n_items, format_timestamp, last_item from xarray.core.pdcompat import nanosecond_precision_timestamp from xarray.core.pycompat import is_duck_dask_array +from xarray.core.utils import emit_user_level_warning from xarray.core.variable import Variable try: @@ -685,7 +686,7 @@ def encode_cf_datetime( 1, _netcdf_to_numpy_timeunit(needed_delta) ).astype("timedelta64[ns]") if needed_delta != delta and time_delta > needed_time_delta: - warnings.warn( + emit_user_level_warning( f"Times can't be serialized faithfully with requested units {units!r}. " f"Resolution of {needed_delta!r} needed. " f"Serializing timeseries to floating point." @@ -721,8 +722,18 @@ def encode_cf_timedelta(timedeltas, units: str | None = None) -> tuple[np.ndarra units = infer_timedelta_units(timedeltas) np_unit = _netcdf_to_numpy_timeunit(units) - num = 1.0 * timedeltas / np.timedelta64(1, np_unit) - num = np.where(pd.isnull(timedeltas), np.nan, num) + + time_delta = np.timedelta64(1, np_unit).astype("timedelta64[ns]") + time_deltas = pd.TimedeltaIndex(timedeltas.ravel()) + + if np.all(time_deltas.dropna() % time_delta == np.timedelta64(0, "ns")): + # calculate int64 floor division + num = time_deltas // time_delta.astype(np.int64) + num = num.astype(np.int64, copy=False) + else: + num = time_deltas / time_delta + num = num.values.reshape(timedeltas.shape) + num = cast_to_int_if_safe(num) return (num, units) From 96f60fcee6461c7a088a3b9cd63b3673526f4108 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Mon, 4 Sep 2023 09:49:36 +0200 Subject: [PATCH 04/17] move time alignment for nc3 to encode_nc3_variable --- xarray/backends/netcdf3.py | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/xarray/backends/netcdf3.py b/xarray/backends/netcdf3.py index fd741ce935e..2196c197d8c 100644 --- a/xarray/backends/netcdf3.py +++ b/xarray/backends/netcdf3.py @@ -62,13 +62,6 @@ def coerce_nc3_dtype(arr): dtype = str(arr.dtype) if dtype in _nc3_dtype_coercions: new_dtype = _nc3_dtype_coercions[dtype] - # check if this looks like a time with NaT - # and transform to float64 - if np.issubdtype(dtype, np.int64): - mask = arr == np.iinfo(np.int64).min - if mask.any(): - arr = np.where(mask, np.nan, arr) - return arr # TODO: raise a warning whenever casting the data-type instead? cast_arr = arr.astype(new_dtype) if not (cast_arr == arr).all(): @@ -95,13 +88,40 @@ def encode_nc3_attrs(attrs): return {k: encode_nc3_attr_value(v) for k, v in attrs.items()} +def _maybe_prepare_times(var): + # checks for integer-based time-like and + # replaces np.iinfo(np.int64).min with _FillValue or np.nan + # this keeps backwards compatibility + + # should we import this from coding.times here? + time_strings = [ + "days", + "hours", + "minutes", + "seconds", + "milliseconds", + "microseconds", + "since", + ] + data = var.data + if data.dtype.kind in "iu": + units = var.attrs.get("units", None) + if units is not None: + if any(tstr in units for tstr in time_strings): + mask = data == np.iinfo(np.int64).min + if mask.any(): + data = np.where(mask, var.attrs.get("_FillValue", np.nan), data) + return data + + def encode_nc3_variable(var): for coder in [ coding.strings.EncodedStringCoder(allows_unicode=False), coding.strings.CharacterArrayCoder(), ]: var = coder.encode(var) - data = coerce_nc3_dtype(var.data) + data = _maybe_prepare_times(var) + data = coerce_nc3_dtype(data) attrs = encode_nc3_attrs(var.attrs) return Variable(var.dims, data, attrs, var.encoding) From 4f6440a965874c206bf7a3ec1a574f18f3b20bc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Mon, 4 Sep 2023 10:17:36 +0200 Subject: [PATCH 05/17] fix test for encode_cf_timedelta --- xarray/tests/test_coding_times.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 580de878fe6..ab8c7ce7d89 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -576,10 +576,10 @@ def test_infer_cftime_datetime_units(calendar, date_args, expected) -> None: ("1ms", "milliseconds", np.int64(1)), ("1us", "microseconds", np.int64(1)), ("1ns", "nanoseconds", np.int64(1)), - (["NaT", "0s", "1s"], None, [np.nan, 0, 1]), + (["NaT", "0s", "1s"], None, [np.iinfo(np.int64).min, 0, 1]), (["30m", "60m"], "hours", [0.5, 1.0]), - ("NaT", "days", np.nan), - (["NaT", "NaT"], "days", [np.nan, np.nan]), + ("NaT", "days", np.iinfo(np.int64).min), + (["NaT", "NaT"], "days", [np.iinfo(np.int64).min, np.iinfo(np.int64).min]), ], ) def test_cf_timedelta(timedeltas, units, numbers) -> None: From f8e961f733979a4f13e870f07d200ff704f16270 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Mon, 4 Sep 2023 15:40:13 +0200 Subject: [PATCH 06/17] fix CFMaskCoder for time-like (also allow timedelta64), add first tests --- xarray/coding/variables.py | 23 ++++++++++--- xarray/tests/test_coding_times.py | 56 +++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 4 deletions(-) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 3c9bf6eda7c..5e655565098 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -215,6 +215,21 @@ def _apply_mask( return np.where(condition, decoded_fill_value, data) +def _is_time_like(units): + # test for time-like + time_strings = [ + "since", + "days", + "hours", + "minutes", + "seconds", + "milliseconds", + "microseconds", + "nanoseconds", + ] + return any(tstr in str(units) for tstr in time_strings) + + class CFMaskCoder(VariableCoder): """Mask or unmask fill values according to CF conventions.""" @@ -237,14 +252,14 @@ def encode(self, variable: Variable, name: T_Name = None): ) # special case DateTime to properly handle NaT - is_date = "since" in attrs.get("units", "") + is_time_like = _is_time_like(attrs.get("units")) if fv_exists: # Ensure _FillValue is cast to same dtype as data's encoding["_FillValue"] = dtype.type(fv) fill_value = pop_to(encoding, attrs, "_FillValue", name=name) if not pd.isnull(fill_value): - if is_date and data.dtype.kind in "iu": + if is_time_like and data.dtype.kind in "iu": data = duck_array_ops.where( data != np.iinfo(np.int64).min, data, fill_value ) @@ -256,7 +271,7 @@ def encode(self, variable: Variable, name: T_Name = None): encoding["missing_value"] = dtype.type(mv) fill_value = pop_to(encoding, attrs, "missing_value", name=name) if not pd.isnull(fill_value) and not fv_exists: - if is_date and data.dtype.kind in "iu": + if is_time_like and data.dtype.kind in "iu": data = duck_array_ops.where( data != np.iinfo(np.int64).min, data, fill_value ) @@ -289,7 +304,7 @@ def decode(self, variable: Variable, name: T_Name = None): ) # special case DateTime to properly handle NaT - if "since" in str(attrs.get("units", "")) and data.dtype.kind in "iu": + if _is_time_like(attrs.get("units")) and data.dtype.kind in "iu": dtype, decoded_fill_value = np.int64, np.iinfo(np.int64).min else: dtype, decoded_fill_value = dtypes.maybe_promote(data.dtype) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index ab8c7ce7d89..36402e37701 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -1191,3 +1191,59 @@ def test_contains_cftime_lazy() -> None: ) array = FirstElementAccessibleArray(times) assert _contains_cftime_datetimes(array) + + +@pytest.mark.parametrize( + "time, dtype, fill_value", + [ + ( + np.datetime64("1677-09-21T00:12:43.145224193", "ns"), + np.int64, + 20, + ), + ( + np.datetime64("1970-09-21T00:12:44.145224808", "ns"), + np.float64, + 1e30, + ), + ( + np.datetime64("1677-09-21T00:12:43.145225216", "ns"), + np.float64, + -9.223372036854776e18, + ), + ], +) +def test_roundtrip_datetime64_nanosecond_precision( + time: np.datetime64, dtype: np.typing.DTypeLike, fill_value: int | float +) -> None: + # test for GH7817 + times = [np.datetime64("1970-01-01", "ns"), np.datetime64("NaT"), time] + encoding = dict(dtype=dtype, _FillValue=fill_value) + var = Variable(["time"], times, encoding=encoding) + + encoded_var = conventions.encode_cf_variable(var) + decoded_var = conventions.decode_cf_variable("foo", encoded_var) + assert_identical(var, decoded_var) + + +@pytest.mark.parametrize( + "dtype, fill_value", + [(np.int64, 20), (np.int64, np.iinfo(np.int64).min), (np.float64, 1e30)], +) +def test_roundtrip_timedelta64_nanosecond_precision( + dtype: np.typing.DTypeLike, fill_value: int | float +) -> None: + # test for GH7942 + one_day = np.timedelta64(1, "ns") + nat = np.timedelta64("nat", "ns") + timedelta_values = (np.arange(5) * one_day).astype("timedelta64[ns]") + timedelta_values[2] = nat + timedelta_values[4] = nat + + encoding = dict(dtype=dtype, _FillValue=fill_value) + var = Variable(["time"], timedelta_values, encoding=encoding) + + encoded_var = conventions.encode_cf_variable(var) + decoded_var = conventions.decode_cf_variable("foo", encoded_var) + + assert_identical(var, decoded_var) From d020bbc003d6c27ed1c2ec3a1376666f36b36625 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Sun, 10 Sep 2023 12:04:10 +0200 Subject: [PATCH 07/17] rename to _unpack_time_units_and_ref_date as suggested in review --- xarray/coding/times.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 5ec869f9671..3618dc2ac7d 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -172,7 +172,7 @@ def _unpack_netcdf_time_units(units: str) -> tuple[str, str]: return delta_units, ref_date -def _unpack_delta_ref_date(units): +def _unpack_time_units_and_ref_date(units): # same us _unpack_netcdf_time_units but finalizes ref_date for # processing in encode_cf_datetime delta, _ref_date = _unpack_netcdf_time_units(units) @@ -670,13 +670,13 @@ def encode_cf_datetime( raise OutOfBoundsDatetime assert dates.dtype == "datetime64[ns]" - delta, ref_date = _unpack_delta_ref_date(units) + delta, ref_date = _unpack_time_units_and_ref_date(units) delta_units = _netcdf_to_numpy_timeunit(delta) time_delta = np.timedelta64(1, delta_units).astype("timedelta64[ns]") # check if times can be represented with given units if data_units != units: - data_delta, data_ref_date = _unpack_delta_ref_date(data_units) + data_delta, data_ref_date = _unpack_time_units_and_ref_date(data_units) needed_delta = _infer_time_units_from_diff( (data_ref_date - ref_date).to_timedelta64() ) From a8c605711d7687866712c1e9215131572a8dc6b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Sun, 10 Sep 2023 14:27:32 +0200 Subject: [PATCH 08/17] refactor delta -> time_units as suggested in review --- xarray/coding/times.py | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 3618dc2ac7d..817e86a25fa 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -172,10 +172,10 @@ def _unpack_netcdf_time_units(units: str) -> tuple[str, str]: return delta_units, ref_date -def _unpack_time_units_and_ref_date(units): +def _unpack_time_units_and_ref_date(units: str) -> tuple[str, pd.Timestamp]: # same us _unpack_netcdf_time_units but finalizes ref_date for # processing in encode_cf_datetime - delta, _ref_date = _unpack_netcdf_time_units(units) + time_units, _ref_date = _unpack_netcdf_time_units(units) # TODO: the strict enforcement of nanosecond precision Timestamps can be # relaxed when addressing GitHub issue #7493. ref_date = nanosecond_precision_timestamp(_ref_date) @@ -183,7 +183,7 @@ def _unpack_time_units_and_ref_date(units): # make it timezone-naive (GH 2649). if ref_date.tz is not None: ref_date = ref_date.tz_convert(None) - return delta, ref_date + return time_units, ref_date def _decode_cf_datetime_dtype( @@ -237,8 +237,8 @@ def _decode_datetime_with_pandas( "pandas." ) - delta, ref_date = _unpack_netcdf_time_units(units) - delta = _netcdf_to_numpy_timeunit(delta) + time_units, ref_date = _unpack_netcdf_time_units(units) + time_units = _netcdf_to_numpy_timeunit(time_units) try: # TODO: the strict enforcement of nanosecond precision Timestamps can be # relaxed when addressing GitHub issue #7493. @@ -252,8 +252,8 @@ def _decode_datetime_with_pandas( warnings.filterwarnings("ignore", "invalid value encountered", RuntimeWarning) if flat_num_dates.size > 0: # avoid size 0 datetimes GH1329 - pd.to_timedelta(flat_num_dates.min(), delta) + ref_date - pd.to_timedelta(flat_num_dates.max(), delta) + ref_date + pd.to_timedelta(flat_num_dates.min(), time_units) + ref_date + pd.to_timedelta(flat_num_dates.max(), time_units) + ref_date # To avoid integer overflow when converting to nanosecond units for integer # dtypes smaller than np.int64 cast all integer and unsigned integer dtype @@ -268,7 +268,7 @@ def _decode_datetime_with_pandas( # works much faster when dealing with integers (GH 1399). # properly handle NaN/NaT to prevent casting NaN to int nan = np.isnan(flat_num_dates) | (flat_num_dates == np.iinfo(np.int64).min) - flat_num_dates = flat_num_dates * _NS_PER_TIME_DELTA[delta] + flat_num_dates = flat_num_dates * _NS_PER_TIME_DELTA[time_units] flat_num_dates_ns_int = np.zeros_like(flat_num_dates, dtype=np.int64) flat_num_dates_ns_int[nan] = np.iinfo(np.int64).min flat_num_dates_ns_int[~nan] = flat_num_dates[~nan].astype(np.int64) @@ -590,12 +590,12 @@ def _should_cftime_be_used( def _cleanup_netcdf_time_units(units: str) -> str: - delta, ref_date = _unpack_netcdf_time_units(units) - delta = delta.lower() - if not delta.endswith("s"): - delta = f"{delta}s" + time_units, ref_date = _unpack_netcdf_time_units(units) + time_units = time_units.lower() + if not time_units.endswith("s"): + time_units = f"{time_units}s" try: - units = f"{delta} since {format_timestamp(ref_date)}" + units = f"{time_units} since {format_timestamp(ref_date)}" except (OutOfBoundsDatetime, ValueError): # don't worry about reifying the units if they're out of bounds or # formatted badly @@ -670,23 +670,23 @@ def encode_cf_datetime( raise OutOfBoundsDatetime assert dates.dtype == "datetime64[ns]" - delta, ref_date = _unpack_time_units_and_ref_date(units) - delta_units = _netcdf_to_numpy_timeunit(delta) - time_delta = np.timedelta64(1, delta_units).astype("timedelta64[ns]") + time_units, ref_date = _unpack_time_units_and_ref_date(units) + time_units = _netcdf_to_numpy_timeunit(time_units) + time_delta = np.timedelta64(1, time_units).astype("timedelta64[ns]") # check if times can be represented with given units if data_units != units: - data_delta, data_ref_date = _unpack_time_units_and_ref_date(data_units) - needed_delta = _infer_time_units_from_diff( + _, data_ref_date = _unpack_time_units_and_ref_date(data_units) + needed_units = _infer_time_units_from_diff( (data_ref_date - ref_date).to_timedelta64() ) needed_time_delta = np.timedelta64( - 1, _netcdf_to_numpy_timeunit(needed_delta) + 1, _netcdf_to_numpy_timeunit(needed_units) ).astype("timedelta64[ns]") - if needed_delta != delta and time_delta > needed_time_delta: + if needed_units != time_units and time_delta > needed_time_delta: emit_user_level_warning( f"Times can't be serialized faithfully with requested units {units!r}. " - f"Resolution of {needed_delta!r} needed. " + f"Resolution of {needed_units!r} needed. " f"Serializing timeseries to floating point." ) From 9b96ff7e9568ec49beddfd79f0f32e4dd80cb38e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Sun, 10 Sep 2023 21:42:29 +0200 Subject: [PATCH 09/17] refactor out function _time_units_to_timedelta64, reorder flow and remove unneeded checks, apply filterwarnings, adapt tests --- xarray/coding/times.py | 70 ++++++++++++++++++------------- xarray/tests/test_coding_times.py | 49 +++++++++++++++++++++- 2 files changed, 89 insertions(+), 30 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 817e86a25fa..32316992126 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -382,6 +382,10 @@ def _infer_time_units_from_diff(unique_timedeltas) -> str: return "seconds" +def _time_units_to_timedelta64(units: str) -> np.timedelta64: + return np.timedelta64(1, _netcdf_to_numpy_timeunit(units)).astype("timedelta64[ns]") + + def infer_calendar_name(dates) -> CFCalendar: """Given an array of datetimes, infer the CF calendar name""" if is_np_datetime_like(dates.dtype): @@ -671,24 +675,15 @@ def encode_cf_datetime( assert dates.dtype == "datetime64[ns]" time_units, ref_date = _unpack_time_units_and_ref_date(units) - time_units = _netcdf_to_numpy_timeunit(time_units) - time_delta = np.timedelta64(1, time_units).astype("timedelta64[ns]") + time_delta = _time_units_to_timedelta64(time_units) - # check if times can be represented with given units + # retrieve needed units to faithfully encode to int64 + needed_units, data_ref_date = _unpack_time_units_and_ref_date(data_units) if data_units != units: - _, data_ref_date = _unpack_time_units_and_ref_date(data_units) - needed_units = _infer_time_units_from_diff( - (data_ref_date - ref_date).to_timedelta64() - ) - needed_time_delta = np.timedelta64( - 1, _netcdf_to_numpy_timeunit(needed_units) - ).astype("timedelta64[ns]") - if needed_units != time_units and time_delta > needed_time_delta: - emit_user_level_warning( - f"Times can't be serialized faithfully with requested units {units!r}. " - f"Resolution of {needed_units!r} needed. " - f"Serializing timeseries to floating point." - ) + # this accounts for differences in the reference times + ref_delta = abs(data_ref_date - ref_date).to_timedelta64() + if ref_delta > np.timedelta64(0, "ns"): + needed_units = _infer_time_units_from_diff(ref_delta) # Wrap the dates in a DatetimeIndex to do the subtraction to ensure # an OverflowError is raised if the ref_date is too far away from @@ -696,43 +691,60 @@ def encode_cf_datetime( dates_as_index = pd.DatetimeIndex(dates.ravel()) time_deltas = dates_as_index - ref_date - # Use floor division if time_delta evenly divides all differences - # to preserve integer dtype if possible (GH 4045). - # NaT prevents us from using datetime64 directly, but we can safely coerce - # to int64 in presence of NaT, so we just dropna before check (GH 7817). - if np.all(time_deltas.dropna() % time_delta == np.timedelta64(0, "ns")): + # needed time delta to encode faithfully to int64 + needed_time_delta = _time_units_to_timedelta64(needed_units) + if time_delta <= needed_time_delta: # calculate int64 floor division + # to preserve integer dtype if possible (GH 4045, GH7817). num = time_deltas // time_delta.astype(np.int64) num = num.astype(np.int64, copy=False) else: + emit_user_level_warning( + f"Times can't be serialized faithfully with requested units {units!r}. " + f"Resolution of {needed_units!r} needed. " + f"Serializing timeseries to floating point." + ) num = time_deltas / time_delta num = num.values.reshape(dates.shape) except (OutOfBoundsDatetime, OverflowError, ValueError): num = _encode_datetime_with_cftime(dates, units, calendar) + # do it now only for cftime-based flow + # we already covered for this in pandas-based flow + num = cast_to_int_if_safe(num) - num = cast_to_int_if_safe(num) return (num, units, calendar) def encode_cf_timedelta(timedeltas, units: str | None = None) -> tuple[np.ndarray, str]: - if units is None: - units = infer_timedelta_units(timedeltas) + data_units = infer_timedelta_units(timedeltas) - np_unit = _netcdf_to_numpy_timeunit(units) + if units is None: + units = data_units - time_delta = np.timedelta64(1, np_unit).astype("timedelta64[ns]") + time_delta = _time_units_to_timedelta64(units) time_deltas = pd.TimedeltaIndex(timedeltas.ravel()) - if np.all(time_deltas.dropna() % time_delta == np.timedelta64(0, "ns")): + # retrieve needed units to faithfully encode to int64 + needed_units = data_units + if data_units != units: + needed_units = _infer_time_units_from_diff(np.unique(time_deltas.dropna())) + + # needed time delta to encode faithfully to int64 + needed_time_delta = _time_units_to_timedelta64(needed_units) + if time_delta <= needed_time_delta: # calculate int64 floor division + # to preserve integer dtype if possible num = time_deltas // time_delta.astype(np.int64) num = num.astype(np.int64, copy=False) else: + emit_user_level_warning( + f"Timedeltas can't be serialized faithfully with requested units {units!r}. " + f"Resolution of {needed_units!r} needed. " + f"Serializing timedeltas to floating point." + ) num = time_deltas / time_delta num = num.values.reshape(timedeltas.shape) - - num = cast_to_int_if_safe(num) return (num, units) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 36402e37701..e71e680f8e3 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -29,7 +29,7 @@ from xarray.coding.variables import SerializationWarning from xarray.conventions import _update_bounds_attributes, cf_encoder from xarray.core.common import contains_cftime_datetimes -from xarray.testing import assert_equal, assert_identical +from xarray.testing import assert_allclose, assert_equal, assert_identical from xarray.tests import ( FirstElementAccessibleArray, arm_xfail, @@ -110,6 +110,7 @@ def _all_cftime_date_types(): @requires_cftime @pytest.mark.filterwarnings("ignore:Ambiguous reference date string") +@pytest.mark.filterwarnings("ignore:Times can't be serialized faithfully") @pytest.mark.parametrize(["num_dates", "units", "calendar"], _CF_DATETIME_TESTS) def test_cf_datetime(num_dates, units, calendar) -> None: import cftime @@ -567,6 +568,7 @@ def test_infer_cftime_datetime_units(calendar, date_args, expected) -> None: assert expected == coding.times.infer_datetime_units(dates) +@pytest.mark.filterwarnings("ignore:Timedeltas can't be serialized faithfully") @pytest.mark.parametrize( ["timedeltas", "units", "numbers"], [ @@ -1020,6 +1022,7 @@ def test_decode_ambiguous_time_warns(calendar) -> None: np.testing.assert_array_equal(result, expected) +@pytest.mark.filterwarnings("ignore:Times can't be serialized faithfully") @pytest.mark.parametrize("encoding_units", FREQUENCIES_TO_ENCODING_UNITS.values()) @pytest.mark.parametrize("freq", FREQUENCIES_TO_ENCODING_UNITS.keys()) @pytest.mark.parametrize("date_range", [pd.date_range, cftime_range]) @@ -1226,6 +1229,28 @@ def test_roundtrip_datetime64_nanosecond_precision( assert_identical(var, decoded_var) +def test_roundtrip_datetime64_nanosecond_precision_warning() -> None: + # test warning if times can't be serialized faithfully + times = [ + np.datetime64("1970-01-01T00:01:00", "ns"), + np.datetime64("NaT"), + np.datetime64("1970-01-02T00:01:00", "ns"), + ] + units = "days since 1970-01-10T01:01:00" + needed_units = "hours" + encoding = dict(_FillValue=20, units=units) + var = Variable(["time"], times, encoding=encoding) + wmsg = ( + f"Times can't be serialized faithfully with requested units {units!r}. " + f"Resolution of {needed_units!r} needed. " + ) + with pytest.warns(UserWarning, match=wmsg): + encoded_var = conventions.encode_cf_variable(var) + + decoded_var = conventions.decode_cf_variable("foo", encoded_var) + assert_identical(var, decoded_var) + + @pytest.mark.parametrize( "dtype, fill_value", [(np.int64, 20), (np.int64, np.iinfo(np.int64).min), (np.float64, 1e30)], @@ -1247,3 +1272,25 @@ def test_roundtrip_timedelta64_nanosecond_precision( decoded_var = conventions.decode_cf_variable("foo", encoded_var) assert_identical(var, decoded_var) + + +def test_roundtrip_timedelta64_nanosecond_precision_warning() -> None: + # test warning if timedeltas can't be serialized faithfully + one_day = np.timedelta64(1, "D") + nat = np.timedelta64("nat", "ns") + timedelta_values = (np.arange(5) * one_day).astype("timedelta64[ns]") + timedelta_values[2] = nat + timedelta_values[4] = np.timedelta64(12, "h").astype("timedelta64[ns]") + + units = "days" + needed_units = "hours" + wmsg = ( + f"Timedeltas can't be serialized faithfully with requested units {units!r}. " + f"Resolution of {needed_units!r} needed. " + ) + encoding = dict(_FillValue=20, units=units) + var = Variable(["time"], timedelta_values, encoding=encoding) + with pytest.warns(UserWarning, match=wmsg): + encoded_var = conventions.encode_cf_variable(var) + decoded_var = conventions.decode_cf_variable("foo", encoded_var) + assert_allclose(var, decoded_var) From 5adb58e864a01dfa938415e23a4202df20a1f11a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Mon, 11 Sep 2023 12:46:20 +0200 Subject: [PATCH 10/17] import _is_time_like from coding.variables --- xarray/backends/netcdf3.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/xarray/backends/netcdf3.py b/xarray/backends/netcdf3.py index 2196c197d8c..db00ef1972b 100644 --- a/xarray/backends/netcdf3.py +++ b/xarray/backends/netcdf3.py @@ -93,21 +93,11 @@ def _maybe_prepare_times(var): # replaces np.iinfo(np.int64).min with _FillValue or np.nan # this keeps backwards compatibility - # should we import this from coding.times here? - time_strings = [ - "days", - "hours", - "minutes", - "seconds", - "milliseconds", - "microseconds", - "since", - ] data = var.data if data.dtype.kind in "iu": units = var.attrs.get("units", None) if units is not None: - if any(tstr in units for tstr in time_strings): + if coding.variables._is_time_like(units): mask = data == np.iinfo(np.int64).min if mask.any(): data = np.where(mask, var.attrs.get("_FillValue", np.nan), data) From 87fbb1ad4977bfbfe3136e48ba70a000ef4cf7e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Mon, 11 Sep 2023 15:18:05 +0200 Subject: [PATCH 11/17] adapt tests, add _numpy_to_netcdf_timeunit-conversion function --- xarray/coding/times.py | 12 +++++++ xarray/tests/test_coding_times.py | 53 ++++++++++++++++++++++--------- 2 files changed, 50 insertions(+), 15 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 32316992126..79efbecfb7c 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -123,6 +123,18 @@ def _netcdf_to_numpy_timeunit(units: str) -> str: }[units] +def _numpy_to_netcdf_timeunit(units: str) -> str: + return { + "ns": "nanoseconds", + "us": "microseconds", + "ms": "milliseconds", + "s": "seconds", + "m": "minutes", + "h": "hours", + "D": "days", + }[units] + + def _ensure_padded_year(ref_date: str) -> str: # Reference dates without a padded year (e.g. since 1-1-1 or since 2-3-4) # are ambiguous (is it YMD or DMY?). This can lead to some very odd diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index e71e680f8e3..268b4f0835b 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -20,6 +20,7 @@ ) from xarray.coding.times import ( _encode_datetime_with_cftime, + _numpy_to_netcdf_timeunit, _should_cftime_be_used, cftime_to_nptime, decode_cf_datetime, @@ -1197,35 +1198,57 @@ def test_contains_cftime_lazy() -> None: @pytest.mark.parametrize( - "time, dtype, fill_value", + "timestr, timeunit, dtype, fill_value, use_encoding", [ + ("1677-09-21T00:12:43.145224193", "ns", np.int64, 20, True), + ("1970-09-21T00:12:44.145224808", "ns", np.float64, 1e30, True), ( - np.datetime64("1677-09-21T00:12:43.145224193", "ns"), - np.int64, - 20, - ), - ( - np.datetime64("1970-09-21T00:12:44.145224808", "ns"), - np.float64, - 1e30, - ), - ( - np.datetime64("1677-09-21T00:12:43.145225216", "ns"), + "1677-09-21T00:12:43.145225216", + "ns", np.float64, -9.223372036854776e18, + True, ), + ("1677-09-21T00:12:43.145224193", "ns", np.int64, None, False), + ("1677-09-21T00:12:43.145225", "us", np.int64, None, False), + ("1970-01-01T00:00:01.000001", "us", np.int64, None, False), ], ) def test_roundtrip_datetime64_nanosecond_precision( - time: np.datetime64, dtype: np.typing.DTypeLike, fill_value: int | float + timestr: str, + timeunit: str, + dtype: np.typing.DTypeLike, + fill_value: int | float | None, + use_encoding: bool, ) -> None: # test for GH7817 - times = [np.datetime64("1970-01-01", "ns"), np.datetime64("NaT"), time] - encoding = dict(dtype=dtype, _FillValue=fill_value) + time = np.datetime64(timestr, timeunit) + times = [np.datetime64("1970-01-01", timeunit), np.datetime64("NaT"), time] + + if use_encoding: + encoding = dict(dtype=dtype, _FillValue=fill_value) + else: + encoding = {} + var = Variable(["time"], times, encoding=encoding) + assert var.dtype == np.dtype(" Date: Mon, 11 Sep 2023 15:46:48 +0200 Subject: [PATCH 12/17] adapt tests, add _numpy_to_netcdf_timeunit-conversion function --- xarray/tests/test_coding_times.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 268b4f0835b..6be0c0ef06b 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -1223,7 +1223,7 @@ def test_roundtrip_datetime64_nanosecond_precision( ) -> None: # test for GH7817 time = np.datetime64(timestr, timeunit) - times = [np.datetime64("1970-01-01", timeunit), np.datetime64("NaT"), time] + times = [np.datetime64("1970-01-01T00:00:00", timeunit), np.datetime64("NaT"), time] if use_encoding: encoding = dict(dtype=dtype, _FillValue=fill_value) @@ -1317,3 +1317,20 @@ def test_roundtrip_timedelta64_nanosecond_precision_warning() -> None: encoded_var = conventions.encode_cf_variable(var) decoded_var = conventions.decode_cf_variable("foo", encoded_var) assert_allclose(var, decoded_var) + + +def test_roundtrip_float_times() -> None: + fill_value = 20.0 + t0 = "2000-01-01 12:00:00" + times = [np.datetime64(t0, "ns"), np.datetime64("NaT", "ns")] + + var = Variable( + ["time"], times, encoding=dict(dtype=np.float64, _FillValue=fill_value) + ) + + encoded_var = conventions.encode_cf_variable(var) + decoded_var = conventions.decode_cf_variable("foo", encoded_var) + + assert_identical(var, decoded_var) + assert decoded_var.encoding["units"] == f"days since {t0}" + assert decoded_var.encoding["_FillValue"] == fill_value From d538ea9355bbd1c7aefa9444b235e8a18be62663 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 13 Sep 2023 08:05:26 +0200 Subject: [PATCH 13/17] adapt test as per review, remove arm_xfail for backend test --- xarray/tests/test_backends.py | 2 -- xarray/tests/test_coding_times.py | 15 ++++++++++----- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 4799b619efd..e9d7a768b86 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -54,7 +54,6 @@ from xarray.core.options import set_options from xarray.core.pycompat import array_type from xarray.tests import ( - arm_xfail, assert_allclose, assert_array_equal, assert_equal, @@ -526,7 +525,6 @@ def test_roundtrip_string_encoded_characters(self) -> None: assert_identical(expected, actual) assert actual["x"].encoding["_Encoding"] == "ascii" - @arm_xfail def test_roundtrip_numpy_datetime_data(self) -> None: times = pd.to_datetime(["2000-01-01", "2000-01-02", "NaT"]) expected = Dataset({"t": ("t", times), "t0": times[0]}) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 6be0c0ef06b..079e432b565 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -1321,16 +1321,21 @@ def test_roundtrip_timedelta64_nanosecond_precision_warning() -> None: def test_roundtrip_float_times() -> None: fill_value = 20.0 - t0 = "2000-01-01 12:00:00" - times = [np.datetime64(t0, "ns"), np.datetime64("NaT", "ns")] + times = [np.datetime64("2000-01-01 12:00:00", "ns"), np.datetime64("NaT", "ns")] + units = "days since 2000-01-01" var = Variable( - ["time"], times, encoding=dict(dtype=np.float64, _FillValue=fill_value) + ["time"], + times, + encoding=dict(dtype=np.float64, _FillValue=fill_value, units=units), ) encoded_var = conventions.encode_cf_variable(var) - decoded_var = conventions.decode_cf_variable("foo", encoded_var) + np.testing.assert_array_equal(encoded_var, np.array([0.5, 20.0])) + assert encoded_var.attrs["units"] == units + assert encoded_var.attrs["_FillValue"] == fill_value + decoded_var = conventions.decode_cf_variable("foo", encoded_var) assert_identical(var, decoded_var) - assert decoded_var.encoding["units"] == f"days since {t0}" + assert decoded_var.encoding["units"] == units assert decoded_var.encoding["_FillValue"] == fill_value From a75e4b86f71aac7f67d08f8535c8b141a4e6d8f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 13 Sep 2023 08:27:57 +0200 Subject: [PATCH 14/17] add whats-new.rst entry --- doc/whats-new.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index e71c7df49d0..a6f1343438b 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -63,6 +63,10 @@ Bug fixes - Fix bug where :py:class:`DataArray` instances on the right-hand side of :py:meth:`DataArray.__setitem__` lose dimension names. (:issue:`7030`, :pull:`8067`) By `Darsh Ranjan `_. +- Fixed a bug where casting from ``float`` to ``int64`` (undefined for ``NaN``) lead to varying + issues (:issue:`7817`, :issue:`7942`, :issue:`7790`, :issue:`6191`, :issue:`7096`, + :issue:`1064`, :pull:`7827`). + By `Kai Mühlbauer `_. Documentation ~~~~~~~~~~~~~ @@ -73,6 +77,8 @@ Internal Changes - Many error messages related to invalid dimensions or coordinates now always show the list of valid dims/coords (:pull:`8079`). By `András Gunyhó `_. +- Refactor of encoding and decoding times/timedeltas to preserve nanosecond resolution (:pull:`7827`). + By `Kai Mühlbauer `_. .. _whats-new.2023.08.0: From d4a71cd595867d8c2cfc08e3e81f91a848d45670 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Thu, 14 Sep 2023 05:38:43 +0200 Subject: [PATCH 15/17] Update doc/whats-new.rst Co-authored-by: Spencer Clark --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index a6f1343438b..d1b6a3b97bd 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -77,7 +77,7 @@ Internal Changes - Many error messages related to invalid dimensions or coordinates now always show the list of valid dims/coords (:pull:`8079`). By `András Gunyhó `_. -- Refactor of encoding and decoding times/timedeltas to preserve nanosecond resolution (:pull:`7827`). +- Refactor of encoding and decoding times/timedeltas to preserve nanosecond resolution in arrays that contain missing values (:pull:`7827`). By `Kai Mühlbauer `_. .. _whats-new.2023.08.0: From 4dca66e250ed0ba4399153ad02bcdc2b0b6f22c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Thu, 14 Sep 2023 05:38:51 +0200 Subject: [PATCH 16/17] Update doc/whats-new.rst Co-authored-by: Spencer Clark --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index d1b6a3b97bd..612424eaa0e 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -63,7 +63,7 @@ Bug fixes - Fix bug where :py:class:`DataArray` instances on the right-hand side of :py:meth:`DataArray.__setitem__` lose dimension names. (:issue:`7030`, :pull:`8067`) By `Darsh Ranjan `_. -- Fixed a bug where casting from ``float`` to ``int64`` (undefined for ``NaN``) lead to varying +- Fixed a bug where casting from ``float`` to ``int64`` (undefined for ``NaN``) led to varying issues (:issue:`7817`, :issue:`7942`, :issue:`7790`, :issue:`6191`, :issue:`7096`, :issue:`1064`, :pull:`7827`). By `Kai Mühlbauer `_. From ebb00b8416f9dcb0c4d7449cf2f7e4edefa8a70d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Sat, 16 Sep 2023 13:48:25 +0200 Subject: [PATCH 17/17] fix whats-new.rst --- doc/whats-new.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 5785572f1d8..75cba8f25e4 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -66,13 +66,14 @@ Bug fixes - Fix bug where :py:class:`DataArray` instances on the right-hand side of :py:meth:`DataArray.__setitem__` lose dimension names. (:issue:`7030`, :pull:`8067`) By `Darsh Ranjan `_. -- Fixed a bug where casting from ``float`` to ``int64`` (undefined for ``NaN``) led to varying - issues (:issue:`7817`, :issue:`7942`, :issue:`7790`, :issue:`6191`, :issue:`7096`, - :issue:`1064`, :pull:`7827`). - Return ``float64`` in presence of ``NaT`` in :py:class:`~core.accessor_dt.DatetimeAccessor` and special case ``NaT`` handling in :py:meth:`~core.accessor_dt.DatetimeAccessor.isocalendar()` (:issue:`7928`, :pull:`8084`). By `Kai Mühlbauer `_. +- Fixed a bug where casting from ``float`` to ``int64`` (undefined for ``NaN``) led to varying + issues (:issue:`7817`, :issue:`7942`, :issue:`7790`, :issue:`6191`, :issue:`7096`, + :issue:`1064`, :pull:`7827`). + By `Kai Mühlbauer `_. Documentation ~~~~~~~~~~~~~