From db94bb9d5f80ae613f7c33cddca89a30e393aa57 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= <kai.muehlbauer@uni-bonn.de>
Date: Thu, 31 Aug 2023 12:45:30 +0200
Subject: [PATCH 01/17] preserve nanosecond resolution when encoding/decoding
 times.

---
 xarray/backends/netcdf3.py |  7 ++++
 xarray/coding/times.py     | 70 ++++++++++++++++++++++++++++----------
 xarray/coding/variables.py | 23 +++++++++++--
 3 files changed, 79 insertions(+), 21 deletions(-)

diff --git a/xarray/backends/netcdf3.py b/xarray/backends/netcdf3.py
index ef389eefc90..fd741ce935e 100644
--- a/xarray/backends/netcdf3.py
+++ b/xarray/backends/netcdf3.py
@@ -62,6 +62,13 @@ def coerce_nc3_dtype(arr):
     dtype = str(arr.dtype)
     if dtype in _nc3_dtype_coercions:
         new_dtype = _nc3_dtype_coercions[dtype]
+        # check if this looks like a time with NaT
+        # and transform to float64
+        if np.issubdtype(dtype, np.int64):
+            mask = arr == np.iinfo(np.int64).min
+            if mask.any():
+                arr = np.where(mask, np.nan, arr)
+                return arr
         # TODO: raise a warning whenever casting the data-type instead?
         cast_arr = arr.astype(new_dtype)
         if not (cast_arr == arr).all():
diff --git a/xarray/coding/times.py b/xarray/coding/times.py
index 3745d61acc0..af98f219b78 100644
--- a/xarray/coding/times.py
+++ b/xarray/coding/times.py
@@ -171,6 +171,20 @@ def _unpack_netcdf_time_units(units: str) -> tuple[str, str]:
     return delta_units, ref_date
 
 
+def _unpack_delta_ref_date(units):
+    # same us _unpack_netcdf_time_units but finalizes ref_date for
+    # processing in encode_cf_datetime
+    delta, _ref_date = _unpack_netcdf_time_units(units)
+    # TODO: the strict enforcement of nanosecond precision Timestamps can be
+    # relaxed when addressing GitHub issue #7493.
+    ref_date = nanosecond_precision_timestamp(_ref_date)
+    # If the ref_date Timestamp is timezone-aware, convert to UTC and
+    # make it timezone-naive (GH 2649).
+    if ref_date.tz is not None:
+        ref_date = ref_date.tz_convert(None)
+    return delta, ref_date
+
+
 def _decode_cf_datetime_dtype(
     data, units: str, calendar: str, use_cftime: bool | None
 ) -> np.dtype:
@@ -251,9 +265,12 @@ def _decode_datetime_with_pandas(
 
     # Cast input ordinals to integers of nanoseconds because pd.to_timedelta
     # works much faster when dealing with integers (GH 1399).
-    flat_num_dates_ns_int = (flat_num_dates * _NS_PER_TIME_DELTA[delta]).astype(
-        np.int64
-    )
+    # properly handle NaN/NaT to prevent casting NaN to int
+    nan = np.isnan(flat_num_dates) | (flat_num_dates == np.iinfo(np.int64).min)
+    flat_num_dates = flat_num_dates * _NS_PER_TIME_DELTA[delta]
+    flat_num_dates_ns_int = np.zeros_like(flat_num_dates, dtype=np.int64)
+    flat_num_dates_ns_int[nan] = np.iinfo(np.int64).min
+    flat_num_dates_ns_int[~nan] = flat_num_dates[~nan].astype(np.int64)
 
     # Use pd.to_timedelta to safely cast integer values to timedeltas,
     # and add those to a Timestamp to safely produce a DatetimeIndex.  This
@@ -575,6 +592,9 @@ def _should_cftime_be_used(
 
 def _cleanup_netcdf_time_units(units: str) -> str:
     delta, ref_date = _unpack_netcdf_time_units(units)
+    delta = delta.lower()
+    if not delta.endswith("s"):
+        delta = f"{delta}s"
     try:
         units = f"{delta} since {format_timestamp(ref_date)}"
     except (OutOfBoundsDatetime, ValueError):
@@ -635,32 +655,41 @@ def encode_cf_datetime(
     """
     dates = np.asarray(dates)
 
+    data_units = infer_datetime_units(dates)
+
     if units is None:
-        units = infer_datetime_units(dates)
+        units = data_units
     else:
         units = _cleanup_netcdf_time_units(units)
 
     if calendar is None:
         calendar = infer_calendar_name(dates)
 
-    delta, _ref_date = _unpack_netcdf_time_units(units)
     try:
         if not _is_standard_calendar(calendar) or dates.dtype.kind == "O":
             # parse with cftime instead
             raise OutOfBoundsDatetime
         assert dates.dtype == "datetime64[ns]"
 
+        delta, ref_date = _unpack_delta_ref_date(units)
         delta_units = _netcdf_to_numpy_timeunit(delta)
         time_delta = np.timedelta64(1, delta_units).astype("timedelta64[ns]")
 
-        # TODO: the strict enforcement of nanosecond precision Timestamps can be
-        # relaxed when addressing GitHub issue #7493.
-        ref_date = nanosecond_precision_timestamp(_ref_date)
-
-        # If the ref_date Timestamp is timezone-aware, convert to UTC and
-        # make it timezone-naive (GH 2649).
-        if ref_date.tz is not None:
-            ref_date = ref_date.tz_convert(None)
+        # check if times can be represented with given units
+        if data_units != units:
+            data_delta, data_ref_date = _unpack_delta_ref_date(data_units)
+            needed_delta = _infer_time_units_from_diff(
+                (data_ref_date - ref_date).to_timedelta64()
+            )
+            needed_time_delta = np.timedelta64(
+                1, _netcdf_to_numpy_timeunit(needed_delta)
+            ).astype("timedelta64[ns]")
+            if needed_delta != delta and time_delta > needed_time_delta:
+                warnings.warn(
+                    f"Times can't be serialized faithfully with requested units {units!r}. "
+                    f"Resolution of {needed_delta!r} needed. "
+                    f"Serializing timeseries to floating point."
+                )
 
         # Wrap the dates in a DatetimeIndex to do the subtraction to ensure
         # an OverflowError is raised if the ref_date is too far away from
@@ -670,8 +699,12 @@ def encode_cf_datetime(
 
         # Use floor division if time_delta evenly divides all differences
         # to preserve integer dtype if possible (GH 4045).
-        if np.all(time_deltas % time_delta == np.timedelta64(0, "ns")):
-            num = time_deltas // time_delta
+        # NaT prevents us from using datetime64 directly, but we can safely coerce
+        # to int64 in presence of NaT, so we just dropna before check (GH 7817).
+        if np.all(time_deltas.dropna() % time_delta == np.timedelta64(0, "ns")):
+            # calculate int64 floor division
+            num = time_deltas // time_delta.astype(np.int64)
+            num = num.astype(np.int64, copy=False)
         else:
             num = time_deltas / time_delta
         num = num.values.reshape(dates.shape)
@@ -704,9 +737,10 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable:
         ) or contains_cftime_datetimes(variable):
             dims, data, attrs, encoding = unpack_for_encoding(variable)
 
-            (data, units, calendar) = encode_cf_datetime(
-                data, encoding.pop("units", None), encoding.pop("calendar", None)
-            )
+            units = encoding.pop("units", None)
+            calendar = encoding.pop("calendar", None)
+            (data, units, calendar) = encode_cf_datetime(data, units, calendar)
+
             safe_setitem(attrs, "units", units, name=name)
             safe_setitem(attrs, "calendar", calendar, name=name)
 
diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py
index 8ba7dcbb0e2..c5dcfc7d9c5 100644
--- a/xarray/coding/variables.py
+++ b/xarray/coding/variables.py
@@ -236,19 +236,32 @@ def encode(self, variable: Variable, name: T_Name = None):
                 f"Variable {name!r} has conflicting _FillValue ({fv}) and missing_value ({mv}). Cannot encode data."
             )
 
+        # special case DateTime to properly handle NaT
+        is_date = "since" in attrs.get("units", "")
+
         if fv_exists:
             # Ensure _FillValue is cast to same dtype as data's
             encoding["_FillValue"] = dtype.type(fv)
             fill_value = pop_to(encoding, attrs, "_FillValue", name=name)
             if not pd.isnull(fill_value):
-                data = duck_array_ops.fillna(data, fill_value)
+                if is_date:
+                    data = duck_array_ops.where(
+                        data != np.iinfo(np.int64).min, data, fill_value
+                    )
+                else:
+                    data = duck_array_ops.fillna(data, fill_value)
 
         if mv_exists:
             # Ensure missing_value is cast to same dtype as data's
             encoding["missing_value"] = dtype.type(mv)
             fill_value = pop_to(encoding, attrs, "missing_value", name=name)
             if not pd.isnull(fill_value) and not fv_exists:
-                data = duck_array_ops.fillna(data, fill_value)
+                if is_date:
+                    data = duck_array_ops.where(
+                        data != np.iinfo(np.int64).min, data, fill_value
+                    )
+                else:
+                    data = duck_array_ops.fillna(data, fill_value)
 
         return Variable(dims, data, attrs, encoding, fastpath=True)
 
@@ -275,7 +288,11 @@ def decode(self, variable: Variable, name: T_Name = None):
                     stacklevel=3,
                 )
 
-            dtype, decoded_fill_value = dtypes.maybe_promote(data.dtype)
+            # special case DateTime to properly handle NaT
+            if "since" in str(attrs.get("units", "")):
+                dtype, decoded_fill_value = np.int64, np.iinfo(np.int64).min
+            else:
+                dtype, decoded_fill_value = dtypes.maybe_promote(data.dtype)
 
             if encoded_fill_values:
                 transform = partial(

From 82b74ecbb4db704bdbc2bc3af54dd51f3d65a665 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= <kmuehlbauer@wradlib.org>
Date: Mon, 4 Sep 2023 07:49:10 +0200
Subject: [PATCH 02/17] Apply suggestions from code review

Co-authored-by: Spencer Clark <spencerkclark@gmail.com>
---
 xarray/coding/variables.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py
index c5dcfc7d9c5..3c9bf6eda7c 100644
--- a/xarray/coding/variables.py
+++ b/xarray/coding/variables.py
@@ -244,7 +244,7 @@ def encode(self, variable: Variable, name: T_Name = None):
             encoding["_FillValue"] = dtype.type(fv)
             fill_value = pop_to(encoding, attrs, "_FillValue", name=name)
             if not pd.isnull(fill_value):
-                if is_date:
+                if is_date and data.dtype.kind in "iu":
                     data = duck_array_ops.where(
                         data != np.iinfo(np.int64).min, data, fill_value
                     )
@@ -256,7 +256,7 @@ def encode(self, variable: Variable, name: T_Name = None):
             encoding["missing_value"] = dtype.type(mv)
             fill_value = pop_to(encoding, attrs, "missing_value", name=name)
             if not pd.isnull(fill_value) and not fv_exists:
-                if is_date:
+                if is_date and data.dtype.kind in "iu":
                     data = duck_array_ops.where(
                         data != np.iinfo(np.int64).min, data, fill_value
                     )
@@ -289,7 +289,7 @@ def decode(self, variable: Variable, name: T_Name = None):
                 )
 
             # special case DateTime to properly handle NaT
-            if "since" in str(attrs.get("units", "")):
+            if "since" in str(attrs.get("units", "")) and data.dtype.kind in "iu":
                 dtype, decoded_fill_value = np.int64, np.iinfo(np.int64).min
             else:
                 dtype, decoded_fill_value = dtypes.maybe_promote(data.dtype)

From 8da55ac2396a1b736ad60727eb870d266607875c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= <kai.muehlbauer@uni-bonn.de>
Date: Mon, 4 Sep 2023 07:50:43 +0200
Subject: [PATCH 03/17] use emit_user_level_warning

---
 xarray/coding/times.py | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/xarray/coding/times.py b/xarray/coding/times.py
index af98f219b78..cb6a34c4ddf 100644
--- a/xarray/coding/times.py
+++ b/xarray/coding/times.py
@@ -25,6 +25,7 @@
 from xarray.core.formatting import first_n_items, format_timestamp, last_item
 from xarray.core.pdcompat import nanosecond_precision_timestamp
 from xarray.core.pycompat import is_duck_dask_array
+from xarray.core.utils import emit_user_level_warning
 from xarray.core.variable import Variable
 
 try:
@@ -685,7 +686,7 @@ def encode_cf_datetime(
                 1, _netcdf_to_numpy_timeunit(needed_delta)
             ).astype("timedelta64[ns]")
             if needed_delta != delta and time_delta > needed_time_delta:
-                warnings.warn(
+                emit_user_level_warning(
                     f"Times can't be serialized faithfully with requested units {units!r}. "
                     f"Resolution of {needed_delta!r} needed. "
                     f"Serializing timeseries to floating point."
@@ -721,8 +722,18 @@ def encode_cf_timedelta(timedeltas, units: str | None = None) -> tuple[np.ndarra
         units = infer_timedelta_units(timedeltas)
 
     np_unit = _netcdf_to_numpy_timeunit(units)
-    num = 1.0 * timedeltas / np.timedelta64(1, np_unit)
-    num = np.where(pd.isnull(timedeltas), np.nan, num)
+
+    time_delta = np.timedelta64(1, np_unit).astype("timedelta64[ns]")
+    time_deltas = pd.TimedeltaIndex(timedeltas.ravel())
+
+    if np.all(time_deltas.dropna() % time_delta == np.timedelta64(0, "ns")):
+        # calculate int64 floor division
+        num = time_deltas // time_delta.astype(np.int64)
+        num = num.astype(np.int64, copy=False)
+    else:
+        num = time_deltas / time_delta
+    num = num.values.reshape(timedeltas.shape)
+
     num = cast_to_int_if_safe(num)
     return (num, units)
 

From 96f60fcee6461c7a088a3b9cd63b3673526f4108 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= <kai.muehlbauer@uni-bonn.de>
Date: Mon, 4 Sep 2023 09:49:36 +0200
Subject: [PATCH 04/17] move time alignment for nc3 to encode_nc3_variable

---
 xarray/backends/netcdf3.py | 36 ++++++++++++++++++++++++++++--------
 1 file changed, 28 insertions(+), 8 deletions(-)

diff --git a/xarray/backends/netcdf3.py b/xarray/backends/netcdf3.py
index fd741ce935e..2196c197d8c 100644
--- a/xarray/backends/netcdf3.py
+++ b/xarray/backends/netcdf3.py
@@ -62,13 +62,6 @@ def coerce_nc3_dtype(arr):
     dtype = str(arr.dtype)
     if dtype in _nc3_dtype_coercions:
         new_dtype = _nc3_dtype_coercions[dtype]
-        # check if this looks like a time with NaT
-        # and transform to float64
-        if np.issubdtype(dtype, np.int64):
-            mask = arr == np.iinfo(np.int64).min
-            if mask.any():
-                arr = np.where(mask, np.nan, arr)
-                return arr
         # TODO: raise a warning whenever casting the data-type instead?
         cast_arr = arr.astype(new_dtype)
         if not (cast_arr == arr).all():
@@ -95,13 +88,40 @@ def encode_nc3_attrs(attrs):
     return {k: encode_nc3_attr_value(v) for k, v in attrs.items()}
 
 
+def _maybe_prepare_times(var):
+    # checks for integer-based time-like and
+    # replaces np.iinfo(np.int64).min with _FillValue or np.nan
+    # this keeps backwards compatibility
+
+    # should we import this from coding.times here?
+    time_strings = [
+        "days",
+        "hours",
+        "minutes",
+        "seconds",
+        "milliseconds",
+        "microseconds",
+        "since",
+    ]
+    data = var.data
+    if data.dtype.kind in "iu":
+        units = var.attrs.get("units", None)
+        if units is not None:
+            if any(tstr in units for tstr in time_strings):
+                mask = data == np.iinfo(np.int64).min
+                if mask.any():
+                    data = np.where(mask, var.attrs.get("_FillValue", np.nan), data)
+    return data
+
+
 def encode_nc3_variable(var):
     for coder in [
         coding.strings.EncodedStringCoder(allows_unicode=False),
         coding.strings.CharacterArrayCoder(),
     ]:
         var = coder.encode(var)
-    data = coerce_nc3_dtype(var.data)
+    data = _maybe_prepare_times(var)
+    data = coerce_nc3_dtype(data)
     attrs = encode_nc3_attrs(var.attrs)
     return Variable(var.dims, data, attrs, var.encoding)
 

From 4f6440a965874c206bf7a3ec1a574f18f3b20bc2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= <kai.muehlbauer@uni-bonn.de>
Date: Mon, 4 Sep 2023 10:17:36 +0200
Subject: [PATCH 05/17] fix test for encode_cf_timedelta

---
 xarray/tests/test_coding_times.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py
index 580de878fe6..ab8c7ce7d89 100644
--- a/xarray/tests/test_coding_times.py
+++ b/xarray/tests/test_coding_times.py
@@ -576,10 +576,10 @@ def test_infer_cftime_datetime_units(calendar, date_args, expected) -> None:
         ("1ms", "milliseconds", np.int64(1)),
         ("1us", "microseconds", np.int64(1)),
         ("1ns", "nanoseconds", np.int64(1)),
-        (["NaT", "0s", "1s"], None, [np.nan, 0, 1]),
+        (["NaT", "0s", "1s"], None, [np.iinfo(np.int64).min, 0, 1]),
         (["30m", "60m"], "hours", [0.5, 1.0]),
-        ("NaT", "days", np.nan),
-        (["NaT", "NaT"], "days", [np.nan, np.nan]),
+        ("NaT", "days", np.iinfo(np.int64).min),
+        (["NaT", "NaT"], "days", [np.iinfo(np.int64).min, np.iinfo(np.int64).min]),
     ],
 )
 def test_cf_timedelta(timedeltas, units, numbers) -> None:

From f8e961f733979a4f13e870f07d200ff704f16270 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= <kai.muehlbauer@uni-bonn.de>
Date: Mon, 4 Sep 2023 15:40:13 +0200
Subject: [PATCH 06/17] fix CFMaskCoder for time-like (also allow timedelta64),
 add first tests

---
 xarray/coding/variables.py        | 23 ++++++++++---
 xarray/tests/test_coding_times.py | 56 +++++++++++++++++++++++++++++++
 2 files changed, 75 insertions(+), 4 deletions(-)

diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py
index 3c9bf6eda7c..5e655565098 100644
--- a/xarray/coding/variables.py
+++ b/xarray/coding/variables.py
@@ -215,6 +215,21 @@ def _apply_mask(
     return np.where(condition, decoded_fill_value, data)
 
 
+def _is_time_like(units):
+    # test for time-like
+    time_strings = [
+        "since",
+        "days",
+        "hours",
+        "minutes",
+        "seconds",
+        "milliseconds",
+        "microseconds",
+        "nanoseconds",
+    ]
+    return any(tstr in str(units) for tstr in time_strings)
+
+
 class CFMaskCoder(VariableCoder):
     """Mask or unmask fill values according to CF conventions."""
 
@@ -237,14 +252,14 @@ def encode(self, variable: Variable, name: T_Name = None):
             )
 
         # special case DateTime to properly handle NaT
-        is_date = "since" in attrs.get("units", "")
+        is_time_like = _is_time_like(attrs.get("units"))
 
         if fv_exists:
             # Ensure _FillValue is cast to same dtype as data's
             encoding["_FillValue"] = dtype.type(fv)
             fill_value = pop_to(encoding, attrs, "_FillValue", name=name)
             if not pd.isnull(fill_value):
-                if is_date and data.dtype.kind in "iu":
+                if is_time_like and data.dtype.kind in "iu":
                     data = duck_array_ops.where(
                         data != np.iinfo(np.int64).min, data, fill_value
                     )
@@ -256,7 +271,7 @@ def encode(self, variable: Variable, name: T_Name = None):
             encoding["missing_value"] = dtype.type(mv)
             fill_value = pop_to(encoding, attrs, "missing_value", name=name)
             if not pd.isnull(fill_value) and not fv_exists:
-                if is_date and data.dtype.kind in "iu":
+                if is_time_like and data.dtype.kind in "iu":
                     data = duck_array_ops.where(
                         data != np.iinfo(np.int64).min, data, fill_value
                     )
@@ -289,7 +304,7 @@ def decode(self, variable: Variable, name: T_Name = None):
                 )
 
             # special case DateTime to properly handle NaT
-            if "since" in str(attrs.get("units", "")) and data.dtype.kind in "iu":
+            if _is_time_like(attrs.get("units")) and data.dtype.kind in "iu":
                 dtype, decoded_fill_value = np.int64, np.iinfo(np.int64).min
             else:
                 dtype, decoded_fill_value = dtypes.maybe_promote(data.dtype)
diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py
index ab8c7ce7d89..36402e37701 100644
--- a/xarray/tests/test_coding_times.py
+++ b/xarray/tests/test_coding_times.py
@@ -1191,3 +1191,59 @@ def test_contains_cftime_lazy() -> None:
     )
     array = FirstElementAccessibleArray(times)
     assert _contains_cftime_datetimes(array)
+
+
+@pytest.mark.parametrize(
+    "time, dtype, fill_value",
+    [
+        (
+            np.datetime64("1677-09-21T00:12:43.145224193", "ns"),
+            np.int64,
+            20,
+        ),
+        (
+            np.datetime64("1970-09-21T00:12:44.145224808", "ns"),
+            np.float64,
+            1e30,
+        ),
+        (
+            np.datetime64("1677-09-21T00:12:43.145225216", "ns"),
+            np.float64,
+            -9.223372036854776e18,
+        ),
+    ],
+)
+def test_roundtrip_datetime64_nanosecond_precision(
+    time: np.datetime64, dtype: np.typing.DTypeLike, fill_value: int | float
+) -> None:
+    # test for GH7817
+    times = [np.datetime64("1970-01-01", "ns"), np.datetime64("NaT"), time]
+    encoding = dict(dtype=dtype, _FillValue=fill_value)
+    var = Variable(["time"], times, encoding=encoding)
+
+    encoded_var = conventions.encode_cf_variable(var)
+    decoded_var = conventions.decode_cf_variable("foo", encoded_var)
+    assert_identical(var, decoded_var)
+
+
+@pytest.mark.parametrize(
+    "dtype, fill_value",
+    [(np.int64, 20), (np.int64, np.iinfo(np.int64).min), (np.float64, 1e30)],
+)
+def test_roundtrip_timedelta64_nanosecond_precision(
+    dtype: np.typing.DTypeLike, fill_value: int | float
+) -> None:
+    # test for GH7942
+    one_day = np.timedelta64(1, "ns")
+    nat = np.timedelta64("nat", "ns")
+    timedelta_values = (np.arange(5) * one_day).astype("timedelta64[ns]")
+    timedelta_values[2] = nat
+    timedelta_values[4] = nat
+
+    encoding = dict(dtype=dtype, _FillValue=fill_value)
+    var = Variable(["time"], timedelta_values, encoding=encoding)
+
+    encoded_var = conventions.encode_cf_variable(var)
+    decoded_var = conventions.decode_cf_variable("foo", encoded_var)
+
+    assert_identical(var, decoded_var)

From d020bbc003d6c27ed1c2ec3a1376666f36b36625 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= <kmuehlbauer@wradlib.org>
Date: Sun, 10 Sep 2023 12:04:10 +0200
Subject: [PATCH 07/17] rename to _unpack_time_units_and_ref_date as suggested
 in review

---
 xarray/coding/times.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/xarray/coding/times.py b/xarray/coding/times.py
index 5ec869f9671..3618dc2ac7d 100644
--- a/xarray/coding/times.py
+++ b/xarray/coding/times.py
@@ -172,7 +172,7 @@ def _unpack_netcdf_time_units(units: str) -> tuple[str, str]:
     return delta_units, ref_date
 
 
-def _unpack_delta_ref_date(units):
+def _unpack_time_units_and_ref_date(units):
     # same us _unpack_netcdf_time_units but finalizes ref_date for
     # processing in encode_cf_datetime
     delta, _ref_date = _unpack_netcdf_time_units(units)
@@ -670,13 +670,13 @@ def encode_cf_datetime(
             raise OutOfBoundsDatetime
         assert dates.dtype == "datetime64[ns]"
 
-        delta, ref_date = _unpack_delta_ref_date(units)
+        delta, ref_date = _unpack_time_units_and_ref_date(units)
         delta_units = _netcdf_to_numpy_timeunit(delta)
         time_delta = np.timedelta64(1, delta_units).astype("timedelta64[ns]")
 
         # check if times can be represented with given units
         if data_units != units:
-            data_delta, data_ref_date = _unpack_delta_ref_date(data_units)
+            data_delta, data_ref_date = _unpack_time_units_and_ref_date(data_units)
             needed_delta = _infer_time_units_from_diff(
                 (data_ref_date - ref_date).to_timedelta64()
             )

From a8c605711d7687866712c1e9215131572a8dc6b6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= <kmuehlbauer@wradlib.org>
Date: Sun, 10 Sep 2023 14:27:32 +0200
Subject: [PATCH 08/17] refactor delta -> time_units as suggested in review

---
 xarray/coding/times.py | 42 +++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/xarray/coding/times.py b/xarray/coding/times.py
index 3618dc2ac7d..817e86a25fa 100644
--- a/xarray/coding/times.py
+++ b/xarray/coding/times.py
@@ -172,10 +172,10 @@ def _unpack_netcdf_time_units(units: str) -> tuple[str, str]:
     return delta_units, ref_date
 
 
-def _unpack_time_units_and_ref_date(units):
+def _unpack_time_units_and_ref_date(units: str) -> tuple[str, pd.Timestamp]:
     # same us _unpack_netcdf_time_units but finalizes ref_date for
     # processing in encode_cf_datetime
-    delta, _ref_date = _unpack_netcdf_time_units(units)
+    time_units, _ref_date = _unpack_netcdf_time_units(units)
     # TODO: the strict enforcement of nanosecond precision Timestamps can be
     # relaxed when addressing GitHub issue #7493.
     ref_date = nanosecond_precision_timestamp(_ref_date)
@@ -183,7 +183,7 @@ def _unpack_time_units_and_ref_date(units):
     # make it timezone-naive (GH 2649).
     if ref_date.tz is not None:
         ref_date = ref_date.tz_convert(None)
-    return delta, ref_date
+    return time_units, ref_date
 
 
 def _decode_cf_datetime_dtype(
@@ -237,8 +237,8 @@ def _decode_datetime_with_pandas(
             "pandas."
         )
 
-    delta, ref_date = _unpack_netcdf_time_units(units)
-    delta = _netcdf_to_numpy_timeunit(delta)
+    time_units, ref_date = _unpack_netcdf_time_units(units)
+    time_units = _netcdf_to_numpy_timeunit(time_units)
     try:
         # TODO: the strict enforcement of nanosecond precision Timestamps can be
         # relaxed when addressing GitHub issue #7493.
@@ -252,8 +252,8 @@ def _decode_datetime_with_pandas(
         warnings.filterwarnings("ignore", "invalid value encountered", RuntimeWarning)
         if flat_num_dates.size > 0:
             # avoid size 0 datetimes GH1329
-            pd.to_timedelta(flat_num_dates.min(), delta) + ref_date
-            pd.to_timedelta(flat_num_dates.max(), delta) + ref_date
+            pd.to_timedelta(flat_num_dates.min(), time_units) + ref_date
+            pd.to_timedelta(flat_num_dates.max(), time_units) + ref_date
 
     # To avoid integer overflow when converting to nanosecond units for integer
     # dtypes smaller than np.int64 cast all integer and unsigned integer dtype
@@ -268,7 +268,7 @@ def _decode_datetime_with_pandas(
     # works much faster when dealing with integers (GH 1399).
     # properly handle NaN/NaT to prevent casting NaN to int
     nan = np.isnan(flat_num_dates) | (flat_num_dates == np.iinfo(np.int64).min)
-    flat_num_dates = flat_num_dates * _NS_PER_TIME_DELTA[delta]
+    flat_num_dates = flat_num_dates * _NS_PER_TIME_DELTA[time_units]
     flat_num_dates_ns_int = np.zeros_like(flat_num_dates, dtype=np.int64)
     flat_num_dates_ns_int[nan] = np.iinfo(np.int64).min
     flat_num_dates_ns_int[~nan] = flat_num_dates[~nan].astype(np.int64)
@@ -590,12 +590,12 @@ def _should_cftime_be_used(
 
 
 def _cleanup_netcdf_time_units(units: str) -> str:
-    delta, ref_date = _unpack_netcdf_time_units(units)
-    delta = delta.lower()
-    if not delta.endswith("s"):
-        delta = f"{delta}s"
+    time_units, ref_date = _unpack_netcdf_time_units(units)
+    time_units = time_units.lower()
+    if not time_units.endswith("s"):
+        time_units = f"{time_units}s"
     try:
-        units = f"{delta} since {format_timestamp(ref_date)}"
+        units = f"{time_units} since {format_timestamp(ref_date)}"
     except (OutOfBoundsDatetime, ValueError):
         # don't worry about reifying the units if they're out of bounds or
         # formatted badly
@@ -670,23 +670,23 @@ def encode_cf_datetime(
             raise OutOfBoundsDatetime
         assert dates.dtype == "datetime64[ns]"
 
-        delta, ref_date = _unpack_time_units_and_ref_date(units)
-        delta_units = _netcdf_to_numpy_timeunit(delta)
-        time_delta = np.timedelta64(1, delta_units).astype("timedelta64[ns]")
+        time_units, ref_date = _unpack_time_units_and_ref_date(units)
+        time_units = _netcdf_to_numpy_timeunit(time_units)
+        time_delta = np.timedelta64(1, time_units).astype("timedelta64[ns]")
 
         # check if times can be represented with given units
         if data_units != units:
-            data_delta, data_ref_date = _unpack_time_units_and_ref_date(data_units)
-            needed_delta = _infer_time_units_from_diff(
+            _, data_ref_date = _unpack_time_units_and_ref_date(data_units)
+            needed_units = _infer_time_units_from_diff(
                 (data_ref_date - ref_date).to_timedelta64()
             )
             needed_time_delta = np.timedelta64(
-                1, _netcdf_to_numpy_timeunit(needed_delta)
+                1, _netcdf_to_numpy_timeunit(needed_units)
             ).astype("timedelta64[ns]")
-            if needed_delta != delta and time_delta > needed_time_delta:
+            if needed_units != time_units and time_delta > needed_time_delta:
                 emit_user_level_warning(
                     f"Times can't be serialized faithfully with requested units {units!r}. "
-                    f"Resolution of {needed_delta!r} needed. "
+                    f"Resolution of {needed_units!r} needed. "
                     f"Serializing timeseries to floating point."
                 )
 

From 9b96ff7e9568ec49beddfd79f0f32e4dd80cb38e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= <kmuehlbauer@wradlib.org>
Date: Sun, 10 Sep 2023 21:42:29 +0200
Subject: [PATCH 09/17] refactor out function _time_units_to_timedelta64,
 reorder flow and remove unneeded checks, apply filterwarnings, adapt tests

---
 xarray/coding/times.py            | 70 ++++++++++++++++++-------------
 xarray/tests/test_coding_times.py | 49 +++++++++++++++++++++-
 2 files changed, 89 insertions(+), 30 deletions(-)

diff --git a/xarray/coding/times.py b/xarray/coding/times.py
index 817e86a25fa..32316992126 100644
--- a/xarray/coding/times.py
+++ b/xarray/coding/times.py
@@ -382,6 +382,10 @@ def _infer_time_units_from_diff(unique_timedeltas) -> str:
     return "seconds"
 
 
+def _time_units_to_timedelta64(units: str) -> np.timedelta64:
+    return np.timedelta64(1, _netcdf_to_numpy_timeunit(units)).astype("timedelta64[ns]")
+
+
 def infer_calendar_name(dates) -> CFCalendar:
     """Given an array of datetimes, infer the CF calendar name"""
     if is_np_datetime_like(dates.dtype):
@@ -671,24 +675,15 @@ def encode_cf_datetime(
         assert dates.dtype == "datetime64[ns]"
 
         time_units, ref_date = _unpack_time_units_and_ref_date(units)
-        time_units = _netcdf_to_numpy_timeunit(time_units)
-        time_delta = np.timedelta64(1, time_units).astype("timedelta64[ns]")
+        time_delta = _time_units_to_timedelta64(time_units)
 
-        # check if times can be represented with given units
+        # retrieve needed units to faithfully encode to int64
+        needed_units, data_ref_date = _unpack_time_units_and_ref_date(data_units)
         if data_units != units:
-            _, data_ref_date = _unpack_time_units_and_ref_date(data_units)
-            needed_units = _infer_time_units_from_diff(
-                (data_ref_date - ref_date).to_timedelta64()
-            )
-            needed_time_delta = np.timedelta64(
-                1, _netcdf_to_numpy_timeunit(needed_units)
-            ).astype("timedelta64[ns]")
-            if needed_units != time_units and time_delta > needed_time_delta:
-                emit_user_level_warning(
-                    f"Times can't be serialized faithfully with requested units {units!r}. "
-                    f"Resolution of {needed_units!r} needed. "
-                    f"Serializing timeseries to floating point."
-                )
+            # this accounts for differences in the reference times
+            ref_delta = abs(data_ref_date - ref_date).to_timedelta64()
+            if ref_delta > np.timedelta64(0, "ns"):
+                needed_units = _infer_time_units_from_diff(ref_delta)
 
         # Wrap the dates in a DatetimeIndex to do the subtraction to ensure
         # an OverflowError is raised if the ref_date is too far away from
@@ -696,43 +691,60 @@ def encode_cf_datetime(
         dates_as_index = pd.DatetimeIndex(dates.ravel())
         time_deltas = dates_as_index - ref_date
 
-        # Use floor division if time_delta evenly divides all differences
-        # to preserve integer dtype if possible (GH 4045).
-        # NaT prevents us from using datetime64 directly, but we can safely coerce
-        # to int64 in presence of NaT, so we just dropna before check (GH 7817).
-        if np.all(time_deltas.dropna() % time_delta == np.timedelta64(0, "ns")):
+        # needed time delta to encode faithfully to int64
+        needed_time_delta = _time_units_to_timedelta64(needed_units)
+        if time_delta <= needed_time_delta:
             # calculate int64 floor division
+            # to preserve integer dtype if possible (GH 4045, GH7817).
             num = time_deltas // time_delta.astype(np.int64)
             num = num.astype(np.int64, copy=False)
         else:
+            emit_user_level_warning(
+                f"Times can't be serialized faithfully with requested units {units!r}. "
+                f"Resolution of {needed_units!r} needed. "
+                f"Serializing timeseries to floating point."
+            )
             num = time_deltas / time_delta
         num = num.values.reshape(dates.shape)
 
     except (OutOfBoundsDatetime, OverflowError, ValueError):
         num = _encode_datetime_with_cftime(dates, units, calendar)
+        # do it now only for cftime-based flow
+        # we already covered for this in pandas-based flow
+        num = cast_to_int_if_safe(num)
 
-    num = cast_to_int_if_safe(num)
     return (num, units, calendar)
 
 
 def encode_cf_timedelta(timedeltas, units: str | None = None) -> tuple[np.ndarray, str]:
-    if units is None:
-        units = infer_timedelta_units(timedeltas)
+    data_units = infer_timedelta_units(timedeltas)
 
-    np_unit = _netcdf_to_numpy_timeunit(units)
+    if units is None:
+        units = data_units
 
-    time_delta = np.timedelta64(1, np_unit).astype("timedelta64[ns]")
+    time_delta = _time_units_to_timedelta64(units)
     time_deltas = pd.TimedeltaIndex(timedeltas.ravel())
 
-    if np.all(time_deltas.dropna() % time_delta == np.timedelta64(0, "ns")):
+    # retrieve needed units to faithfully encode to int64
+    needed_units = data_units
+    if data_units != units:
+        needed_units = _infer_time_units_from_diff(np.unique(time_deltas.dropna()))
+
+    # needed time delta to encode faithfully to int64
+    needed_time_delta = _time_units_to_timedelta64(needed_units)
+    if time_delta <= needed_time_delta:
         # calculate int64 floor division
+        # to preserve integer dtype if possible
         num = time_deltas // time_delta.astype(np.int64)
         num = num.astype(np.int64, copy=False)
     else:
+        emit_user_level_warning(
+            f"Timedeltas can't be serialized faithfully with requested units {units!r}. "
+            f"Resolution of {needed_units!r} needed. "
+            f"Serializing timedeltas to floating point."
+        )
         num = time_deltas / time_delta
     num = num.values.reshape(timedeltas.shape)
-
-    num = cast_to_int_if_safe(num)
     return (num, units)
 
 
diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py
index 36402e37701..e71e680f8e3 100644
--- a/xarray/tests/test_coding_times.py
+++ b/xarray/tests/test_coding_times.py
@@ -29,7 +29,7 @@
 from xarray.coding.variables import SerializationWarning
 from xarray.conventions import _update_bounds_attributes, cf_encoder
 from xarray.core.common import contains_cftime_datetimes
-from xarray.testing import assert_equal, assert_identical
+from xarray.testing import assert_allclose, assert_equal, assert_identical
 from xarray.tests import (
     FirstElementAccessibleArray,
     arm_xfail,
@@ -110,6 +110,7 @@ def _all_cftime_date_types():
 
 @requires_cftime
 @pytest.mark.filterwarnings("ignore:Ambiguous reference date string")
+@pytest.mark.filterwarnings("ignore:Times can't be serialized faithfully")
 @pytest.mark.parametrize(["num_dates", "units", "calendar"], _CF_DATETIME_TESTS)
 def test_cf_datetime(num_dates, units, calendar) -> None:
     import cftime
@@ -567,6 +568,7 @@ def test_infer_cftime_datetime_units(calendar, date_args, expected) -> None:
     assert expected == coding.times.infer_datetime_units(dates)
 
 
+@pytest.mark.filterwarnings("ignore:Timedeltas can't be serialized faithfully")
 @pytest.mark.parametrize(
     ["timedeltas", "units", "numbers"],
     [
@@ -1020,6 +1022,7 @@ def test_decode_ambiguous_time_warns(calendar) -> None:
     np.testing.assert_array_equal(result, expected)
 
 
+@pytest.mark.filterwarnings("ignore:Times can't be serialized faithfully")
 @pytest.mark.parametrize("encoding_units", FREQUENCIES_TO_ENCODING_UNITS.values())
 @pytest.mark.parametrize("freq", FREQUENCIES_TO_ENCODING_UNITS.keys())
 @pytest.mark.parametrize("date_range", [pd.date_range, cftime_range])
@@ -1226,6 +1229,28 @@ def test_roundtrip_datetime64_nanosecond_precision(
     assert_identical(var, decoded_var)
 
 
+def test_roundtrip_datetime64_nanosecond_precision_warning() -> None:
+    # test warning if times can't be serialized faithfully
+    times = [
+        np.datetime64("1970-01-01T00:01:00", "ns"),
+        np.datetime64("NaT"),
+        np.datetime64("1970-01-02T00:01:00", "ns"),
+    ]
+    units = "days since 1970-01-10T01:01:00"
+    needed_units = "hours"
+    encoding = dict(_FillValue=20, units=units)
+    var = Variable(["time"], times, encoding=encoding)
+    wmsg = (
+        f"Times can't be serialized faithfully with requested units {units!r}. "
+        f"Resolution of {needed_units!r} needed. "
+    )
+    with pytest.warns(UserWarning, match=wmsg):
+        encoded_var = conventions.encode_cf_variable(var)
+
+    decoded_var = conventions.decode_cf_variable("foo", encoded_var)
+    assert_identical(var, decoded_var)
+
+
 @pytest.mark.parametrize(
     "dtype, fill_value",
     [(np.int64, 20), (np.int64, np.iinfo(np.int64).min), (np.float64, 1e30)],
@@ -1247,3 +1272,25 @@ def test_roundtrip_timedelta64_nanosecond_precision(
     decoded_var = conventions.decode_cf_variable("foo", encoded_var)
 
     assert_identical(var, decoded_var)
+
+
+def test_roundtrip_timedelta64_nanosecond_precision_warning() -> None:
+    # test warning if timedeltas can't be serialized faithfully
+    one_day = np.timedelta64(1, "D")
+    nat = np.timedelta64("nat", "ns")
+    timedelta_values = (np.arange(5) * one_day).astype("timedelta64[ns]")
+    timedelta_values[2] = nat
+    timedelta_values[4] = np.timedelta64(12, "h").astype("timedelta64[ns]")
+
+    units = "days"
+    needed_units = "hours"
+    wmsg = (
+        f"Timedeltas can't be serialized faithfully with requested units {units!r}. "
+        f"Resolution of {needed_units!r} needed. "
+    )
+    encoding = dict(_FillValue=20, units=units)
+    var = Variable(["time"], timedelta_values, encoding=encoding)
+    with pytest.warns(UserWarning, match=wmsg):
+        encoded_var = conventions.encode_cf_variable(var)
+    decoded_var = conventions.decode_cf_variable("foo", encoded_var)
+    assert_allclose(var, decoded_var)

From 5adb58e864a01dfa938415e23a4202df20a1f11a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= <kai.muehlbauer@uni-bonn.de>
Date: Mon, 11 Sep 2023 12:46:20 +0200
Subject: [PATCH 10/17] import _is_time_like from coding.variables

---
 xarray/backends/netcdf3.py | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/xarray/backends/netcdf3.py b/xarray/backends/netcdf3.py
index 2196c197d8c..db00ef1972b 100644
--- a/xarray/backends/netcdf3.py
+++ b/xarray/backends/netcdf3.py
@@ -93,21 +93,11 @@ def _maybe_prepare_times(var):
     # replaces np.iinfo(np.int64).min with _FillValue or np.nan
     # this keeps backwards compatibility
 
-    # should we import this from coding.times here?
-    time_strings = [
-        "days",
-        "hours",
-        "minutes",
-        "seconds",
-        "milliseconds",
-        "microseconds",
-        "since",
-    ]
     data = var.data
     if data.dtype.kind in "iu":
         units = var.attrs.get("units", None)
         if units is not None:
-            if any(tstr in units for tstr in time_strings):
+            if coding.variables._is_time_like(units):
                 mask = data == np.iinfo(np.int64).min
                 if mask.any():
                     data = np.where(mask, var.attrs.get("_FillValue", np.nan), data)

From 87fbb1ad4977bfbfe3136e48ba70a000ef4cf7e6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= <kai.muehlbauer@uni-bonn.de>
Date: Mon, 11 Sep 2023 15:18:05 +0200
Subject: [PATCH 11/17] adapt tests, add _numpy_to_netcdf_timeunit-conversion
 function

---
 xarray/coding/times.py            | 12 +++++++
 xarray/tests/test_coding_times.py | 53 ++++++++++++++++++++++---------
 2 files changed, 50 insertions(+), 15 deletions(-)

diff --git a/xarray/coding/times.py b/xarray/coding/times.py
index 32316992126..79efbecfb7c 100644
--- a/xarray/coding/times.py
+++ b/xarray/coding/times.py
@@ -123,6 +123,18 @@ def _netcdf_to_numpy_timeunit(units: str) -> str:
     }[units]
 
 
+def _numpy_to_netcdf_timeunit(units: str) -> str:
+    return {
+        "ns": "nanoseconds",
+        "us": "microseconds",
+        "ms": "milliseconds",
+        "s": "seconds",
+        "m": "minutes",
+        "h": "hours",
+        "D": "days",
+    }[units]
+
+
 def _ensure_padded_year(ref_date: str) -> str:
     # Reference dates without a padded year (e.g. since 1-1-1 or since 2-3-4)
     # are ambiguous (is it YMD or DMY?). This can lead to some very odd
diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py
index e71e680f8e3..268b4f0835b 100644
--- a/xarray/tests/test_coding_times.py
+++ b/xarray/tests/test_coding_times.py
@@ -20,6 +20,7 @@
 )
 from xarray.coding.times import (
     _encode_datetime_with_cftime,
+    _numpy_to_netcdf_timeunit,
     _should_cftime_be_used,
     cftime_to_nptime,
     decode_cf_datetime,
@@ -1197,35 +1198,57 @@ def test_contains_cftime_lazy() -> None:
 
 
 @pytest.mark.parametrize(
-    "time, dtype, fill_value",
+    "timestr, timeunit, dtype, fill_value, use_encoding",
     [
+        ("1677-09-21T00:12:43.145224193", "ns", np.int64, 20, True),
+        ("1970-09-21T00:12:44.145224808", "ns", np.float64, 1e30, True),
         (
-            np.datetime64("1677-09-21T00:12:43.145224193", "ns"),
-            np.int64,
-            20,
-        ),
-        (
-            np.datetime64("1970-09-21T00:12:44.145224808", "ns"),
-            np.float64,
-            1e30,
-        ),
-        (
-            np.datetime64("1677-09-21T00:12:43.145225216", "ns"),
+            "1677-09-21T00:12:43.145225216",
+            "ns",
             np.float64,
             -9.223372036854776e18,
+            True,
         ),
+        ("1677-09-21T00:12:43.145224193", "ns", np.int64, None, False),
+        ("1677-09-21T00:12:43.145225", "us", np.int64, None, False),
+        ("1970-01-01T00:00:01.000001", "us", np.int64, None, False),
     ],
 )
 def test_roundtrip_datetime64_nanosecond_precision(
-    time: np.datetime64, dtype: np.typing.DTypeLike, fill_value: int | float
+    timestr: str,
+    timeunit: str,
+    dtype: np.typing.DTypeLike,
+    fill_value: int | float | None,
+    use_encoding: bool,
 ) -> None:
     # test for GH7817
-    times = [np.datetime64("1970-01-01", "ns"), np.datetime64("NaT"), time]
-    encoding = dict(dtype=dtype, _FillValue=fill_value)
+    time = np.datetime64(timestr, timeunit)
+    times = [np.datetime64("1970-01-01", timeunit), np.datetime64("NaT"), time]
+
+    if use_encoding:
+        encoding = dict(dtype=dtype, _FillValue=fill_value)
+    else:
+        encoding = {}
+
     var = Variable(["time"], times, encoding=encoding)
+    assert var.dtype == np.dtype("<M8[ns]")
 
     encoded_var = conventions.encode_cf_variable(var)
+    assert (
+        encoded_var.attrs["units"]
+        == f"{_numpy_to_netcdf_timeunit(timeunit)} since 1970-01-01 00:00:00"
+    )
+    assert encoded_var.attrs["calendar"] == "proleptic_gregorian"
+    assert encoded_var.data.dtype == dtype
+
     decoded_var = conventions.decode_cf_variable("foo", encoded_var)
+    assert decoded_var.dtype == np.dtype("<M8[ns]")
+    assert (
+        decoded_var.encoding["units"]
+        == f"{_numpy_to_netcdf_timeunit(timeunit)} since 1970-01-01 00:00:00"
+    )
+    assert decoded_var.encoding["dtype"] == dtype
+    assert decoded_var.encoding["calendar"] == "proleptic_gregorian"
     assert_identical(var, decoded_var)
 
 

From 1fbc8818489a0b7e984c9dd19310a575ecd895e2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= <kai.muehlbauer@uni-bonn.de>
Date: Mon, 11 Sep 2023 15:46:48 +0200
Subject: [PATCH 12/17] adapt tests, add _numpy_to_netcdf_timeunit-conversion
 function

---
 xarray/tests/test_coding_times.py | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py
index 268b4f0835b..6be0c0ef06b 100644
--- a/xarray/tests/test_coding_times.py
+++ b/xarray/tests/test_coding_times.py
@@ -1223,7 +1223,7 @@ def test_roundtrip_datetime64_nanosecond_precision(
 ) -> None:
     # test for GH7817
     time = np.datetime64(timestr, timeunit)
-    times = [np.datetime64("1970-01-01", timeunit), np.datetime64("NaT"), time]
+    times = [np.datetime64("1970-01-01T00:00:00", timeunit), np.datetime64("NaT"), time]
 
     if use_encoding:
         encoding = dict(dtype=dtype, _FillValue=fill_value)
@@ -1317,3 +1317,20 @@ def test_roundtrip_timedelta64_nanosecond_precision_warning() -> None:
         encoded_var = conventions.encode_cf_variable(var)
     decoded_var = conventions.decode_cf_variable("foo", encoded_var)
     assert_allclose(var, decoded_var)
+
+
+def test_roundtrip_float_times() -> None:
+    fill_value = 20.0
+    t0 = "2000-01-01 12:00:00"
+    times = [np.datetime64(t0, "ns"), np.datetime64("NaT", "ns")]
+
+    var = Variable(
+        ["time"], times, encoding=dict(dtype=np.float64, _FillValue=fill_value)
+    )
+
+    encoded_var = conventions.encode_cf_variable(var)
+    decoded_var = conventions.decode_cf_variable("foo", encoded_var)
+
+    assert_identical(var, decoded_var)
+    assert decoded_var.encoding["units"] == f"days since {t0}"
+    assert decoded_var.encoding["_FillValue"] == fill_value

From d538ea9355bbd1c7aefa9444b235e8a18be62663 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= <kai.muehlbauer@uni-bonn.de>
Date: Wed, 13 Sep 2023 08:05:26 +0200
Subject: [PATCH 13/17] adapt test as per review, remove arm_xfail for backend
 test

---
 xarray/tests/test_backends.py     |  2 --
 xarray/tests/test_coding_times.py | 15 ++++++++++-----
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 4799b619efd..e9d7a768b86 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -54,7 +54,6 @@
 from xarray.core.options import set_options
 from xarray.core.pycompat import array_type
 from xarray.tests import (
-    arm_xfail,
     assert_allclose,
     assert_array_equal,
     assert_equal,
@@ -526,7 +525,6 @@ def test_roundtrip_string_encoded_characters(self) -> None:
             assert_identical(expected, actual)
             assert actual["x"].encoding["_Encoding"] == "ascii"
 
-    @arm_xfail
     def test_roundtrip_numpy_datetime_data(self) -> None:
         times = pd.to_datetime(["2000-01-01", "2000-01-02", "NaT"])
         expected = Dataset({"t": ("t", times), "t0": times[0]})
diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py
index 6be0c0ef06b..079e432b565 100644
--- a/xarray/tests/test_coding_times.py
+++ b/xarray/tests/test_coding_times.py
@@ -1321,16 +1321,21 @@ def test_roundtrip_timedelta64_nanosecond_precision_warning() -> None:
 
 def test_roundtrip_float_times() -> None:
     fill_value = 20.0
-    t0 = "2000-01-01 12:00:00"
-    times = [np.datetime64(t0, "ns"), np.datetime64("NaT", "ns")]
+    times = [np.datetime64("2000-01-01 12:00:00", "ns"), np.datetime64("NaT", "ns")]
 
+    units = "days since 2000-01-01"
     var = Variable(
-        ["time"], times, encoding=dict(dtype=np.float64, _FillValue=fill_value)
+        ["time"],
+        times,
+        encoding=dict(dtype=np.float64, _FillValue=fill_value, units=units),
     )
 
     encoded_var = conventions.encode_cf_variable(var)
-    decoded_var = conventions.decode_cf_variable("foo", encoded_var)
+    np.testing.assert_array_equal(encoded_var, np.array([0.5, 20.0]))
+    assert encoded_var.attrs["units"] == units
+    assert encoded_var.attrs["_FillValue"] == fill_value
 
+    decoded_var = conventions.decode_cf_variable("foo", encoded_var)
     assert_identical(var, decoded_var)
-    assert decoded_var.encoding["units"] == f"days since {t0}"
+    assert decoded_var.encoding["units"] == units
     assert decoded_var.encoding["_FillValue"] == fill_value

From a75e4b86f71aac7f67d08f8535c8b141a4e6d8f5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= <kai.muehlbauer@uni-bonn.de>
Date: Wed, 13 Sep 2023 08:27:57 +0200
Subject: [PATCH 14/17] add whats-new.rst entry

---
 doc/whats-new.rst | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index e71c7df49d0..a6f1343438b 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -63,6 +63,10 @@ Bug fixes
 - Fix bug where :py:class:`DataArray` instances on the right-hand side
   of :py:meth:`DataArray.__setitem__` lose dimension names.
   (:issue:`7030`, :pull:`8067`) By `Darsh Ranjan <https://github.com/dranjan>`_.
+- Fixed a bug where casting from ``float`` to ``int64`` (undefined for ``NaN``) lead to varying
+  issues (:issue:`7817`, :issue:`7942`, :issue:`7790`, :issue:`6191`, :issue:`7096`,
+  :issue:`1064`, :pull:`7827`).
+  By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
 
 Documentation
 ~~~~~~~~~~~~~
@@ -73,6 +77,8 @@ Internal Changes
 
 - Many error messages related to invalid dimensions or coordinates now always show the list of valid dims/coords (:pull:`8079`).
   By `András Gunyhó <https://github.com/mgunyho>`_.
+- Refactor of encoding and decoding times/timedeltas to preserve nanosecond resolution (:pull:`7827`).
+  By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
 
 .. _whats-new.2023.08.0:
 

From d4a71cd595867d8c2cfc08e3e81f91a848d45670 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= <kmuehlbauer@wradlib.org>
Date: Thu, 14 Sep 2023 05:38:43 +0200
Subject: [PATCH 15/17] Update doc/whats-new.rst

Co-authored-by: Spencer Clark <spencerkclark@gmail.com>
---
 doc/whats-new.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index a6f1343438b..d1b6a3b97bd 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -77,7 +77,7 @@ Internal Changes
 
 - Many error messages related to invalid dimensions or coordinates now always show the list of valid dims/coords (:pull:`8079`).
   By `András Gunyhó <https://github.com/mgunyho>`_.
-- Refactor of encoding and decoding times/timedeltas to preserve nanosecond resolution (:pull:`7827`).
+- Refactor of encoding and decoding times/timedeltas to preserve nanosecond resolution in arrays that contain missing values (:pull:`7827`).
   By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
 
 .. _whats-new.2023.08.0:

From 4dca66e250ed0ba4399153ad02bcdc2b0b6f22c6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= <kmuehlbauer@wradlib.org>
Date: Thu, 14 Sep 2023 05:38:51 +0200
Subject: [PATCH 16/17] Update doc/whats-new.rst

Co-authored-by: Spencer Clark <spencerkclark@gmail.com>
---
 doc/whats-new.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index d1b6a3b97bd..612424eaa0e 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -63,7 +63,7 @@ Bug fixes
 - Fix bug where :py:class:`DataArray` instances on the right-hand side
   of :py:meth:`DataArray.__setitem__` lose dimension names.
   (:issue:`7030`, :pull:`8067`) By `Darsh Ranjan <https://github.com/dranjan>`_.
-- Fixed a bug where casting from ``float`` to ``int64`` (undefined for ``NaN``) lead to varying
+- Fixed a bug where casting from ``float`` to ``int64`` (undefined for ``NaN``) led to varying
   issues (:issue:`7817`, :issue:`7942`, :issue:`7790`, :issue:`6191`, :issue:`7096`,
   :issue:`1064`, :pull:`7827`).
   By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.

From ebb00b8416f9dcb0c4d7449cf2f7e4edefa8a70d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= <kmuehlbauer@wradlib.org>
Date: Sat, 16 Sep 2023 13:48:25 +0200
Subject: [PATCH 17/17] fix whats-new.rst

---
 doc/whats-new.rst | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 5785572f1d8..75cba8f25e4 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -66,13 +66,14 @@ Bug fixes
 - Fix bug where :py:class:`DataArray` instances on the right-hand side
   of :py:meth:`DataArray.__setitem__` lose dimension names.
   (:issue:`7030`, :pull:`8067`) By `Darsh Ranjan <https://github.com/dranjan>`_.
-- Fixed a bug where casting from ``float`` to ``int64`` (undefined for ``NaN``) led to varying
-  issues (:issue:`7817`, :issue:`7942`, :issue:`7790`, :issue:`6191`, :issue:`7096`,
-  :issue:`1064`, :pull:`7827`).
 - Return ``float64`` in presence of ``NaT`` in :py:class:`~core.accessor_dt.DatetimeAccessor` and
   special case ``NaT`` handling in :py:meth:`~core.accessor_dt.DatetimeAccessor.isocalendar()`
   (:issue:`7928`, :pull:`8084`).
   By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
+- Fixed a bug where casting from ``float`` to ``int64`` (undefined for ``NaN``) led to varying
+  issues (:issue:`7817`, :issue:`7942`, :issue:`7790`, :issue:`6191`, :issue:`7096`,
+  :issue:`1064`, :pull:`7827`).
+  By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
 
 Documentation
 ~~~~~~~~~~~~~