Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

override units for datetime64/timedelta64 variables to preserve integer dtype #8201

Merged
merged 22 commits into from
Sep 24, 2023
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
37db2b0
remove `dtype` from encoding for datetime64/timedelta64 variables to …
kmuehlbauer Sep 18, 2023
490b186
Merge branch 'main' into drop-time-dtype
kmuehlbauer Sep 19, 2023
aff4e63
adapt tests
kmuehlbauer Sep 19, 2023
f6a954c
add whats-new.rst entry
kmuehlbauer Sep 19, 2023
e8de234
Update xarray/coding/times.py
kmuehlbauer Sep 19, 2023
b19a0e2
Update doc/whats-new.rst
kmuehlbauer Sep 19, 2023
7c19267
Merge branch 'main' into drop-time-dtype
kmuehlbauer Sep 19, 2023
fc63ed9
add test per review suggestion, replace .kind-check with np.issubdtyp…
kmuehlbauer Sep 19, 2023
35b77d6
align timedelta64 check with datetime64 check
kmuehlbauer Sep 20, 2023
ebd2e9a
Merge branch 'main' into drop-time-dtype
kmuehlbauer Sep 20, 2023
6691bfa
Merge branch 'main' into drop-time-dtype
kmuehlbauer Sep 21, 2023
64e2087
override units instead of dtype
kmuehlbauer Sep 22, 2023
f3ca8bf
remove print statement
kmuehlbauer Sep 22, 2023
90d4e0c
warn in case of serialization to floating point, too
kmuehlbauer Sep 22, 2023
4340884
align if-else
kmuehlbauer Sep 22, 2023
a7ba341
Add instructions to warnings
kmuehlbauer Sep 24, 2023
9e25c3a
Merge branch 'main' into drop-time-dtype
kmuehlbauer Sep 24, 2023
f4b3153
Fix test
kmuehlbauer Sep 24, 2023
32461c0
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 24, 2023
410b9de
Merge branch 'main' into drop-time-dtype
kmuehlbauer Sep 24, 2023
43664c9
use warnings.catch_warnings
kmuehlbauer Sep 24, 2023
33a3e1a
Update doc/whats-new.rst
kmuehlbauer Sep 24, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ Bug fixes
- ``.rolling_exp`` functions no longer mistakenly lose non-dimensioned coords
(:issue:`6528`, :pull:`8114`)
By `Maximilian Roos <https://github.com/max-sixty>`_.
- Override the user-provided encoding dtype for datetime64/timedelta64 values if it is an integer dtype, but the units encoding would require floating point values for most faithful serialization to disk (:issue:`1064`, :pull:`8201`).
By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.

Documentation
~~~~~~~~~~~~~
Expand Down
13 changes: 13 additions & 0 deletions xarray/coding/times.py
Original file line number Diff line number Diff line change
Expand Up @@ -777,6 +777,14 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable:
safe_setitem(attrs, "units", units, name=name)
safe_setitem(attrs, "calendar", calendar, name=name)

# drop dtype in encoding if encoded as float64
# to prevent unnecessary casts, see GH #1064
encoding_dtype = encoding.get("dtype", data.dtype)
if np.issubdtype(data.dtype, np.float64) and np.issubdtype(
encoding_dtype, np.integer
):
encoding.pop("dtype", None)

return Variable(dims, data, attrs, encoding, fastpath=True)
else:
return variable
Expand Down Expand Up @@ -810,6 +818,11 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable:
data, units = encode_cf_timedelta(data, encoding.pop("units", None))
safe_setitem(attrs, "units", units, name=name)

# drop dtype in encoding if encoded as float64
# to prevent unnecessary casts, see GH #1064
if np.issubdtype(data.dtype, np.float64):
kmuehlbauer marked this conversation as resolved.
Show resolved Hide resolved
encoding.pop("dtype", None)

return Variable(dims, data, attrs, encoding, fastpath=True)
else:
return variable
Expand Down
16 changes: 12 additions & 4 deletions xarray/tests/test_coding_times.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
from xarray.coding.variables import SerializationWarning
from xarray.conventions import _update_bounds_attributes, cf_encoder
from xarray.core.common import contains_cftime_datetimes
from xarray.testing import assert_allclose, assert_equal, assert_identical
from xarray.testing import assert_equal, assert_identical
from xarray.tests import (
FirstElementAccessibleArray,
arm_xfail,
Expand Down Expand Up @@ -1212,6 +1212,7 @@ def test_contains_cftime_lazy() -> None:
("1677-09-21T00:12:43.145224193", "ns", np.int64, None, False),
("1677-09-21T00:12:43.145225", "us", np.int64, None, False),
("1970-01-01T00:00:01.000001", "us", np.int64, None, False),
("1677-09-21T00:21:52.901038080", "ns", np.float32, 20.0, True),
],
)
def test_roundtrip_datetime64_nanosecond_precision(
Expand Down Expand Up @@ -1261,14 +1262,17 @@ def test_roundtrip_datetime64_nanosecond_precision_warning() -> None:
]
units = "days since 1970-01-10T01:01:00"
needed_units = "hours"
encoding = dict(_FillValue=20, units=units)
encoding = dict(dtype=np.int64, _FillValue=20, units=units)
var = Variable(["time"], times, encoding=encoding)
wmsg = (
f"Times can't be serialized faithfully with requested units {units!r}. "
f"Resolution of {needed_units!r} needed. "
)
with pytest.warns(UserWarning, match=wmsg):
encoded_var = conventions.encode_cf_variable(var)
assert encoded_var.dtype == np.float64
assert encoded_var.attrs["units"] == units
assert encoded_var.attrs["_FillValue"] == 20.0

decoded_var = conventions.decode_cf_variable("foo", encoded_var)
assert_identical(var, decoded_var)
Expand Down Expand Up @@ -1311,12 +1315,16 @@ def test_roundtrip_timedelta64_nanosecond_precision_warning() -> None:
f"Timedeltas can't be serialized faithfully with requested units {units!r}. "
f"Resolution of {needed_units!r} needed. "
)
encoding = dict(_FillValue=20, units=units)
encoding = dict(dtype=np.int64, _FillValue=20, units=units)
kmuehlbauer marked this conversation as resolved.
Show resolved Hide resolved
var = Variable(["time"], timedelta_values, encoding=encoding)
with pytest.warns(UserWarning, match=wmsg):
encoded_var = conventions.encode_cf_variable(var)
assert encoded_var.dtype == np.float64
assert encoded_var.attrs["units"] == units
assert encoded_var.attrs["_FillValue"] == 20.0
decoded_var = conventions.decode_cf_variable("foo", encoded_var)
assert_allclose(var, decoded_var)
assert_identical(var, decoded_var)
assert decoded_var.encoding["dtype"] == np.float64


def test_roundtrip_float_times() -> None:
Expand Down
Loading