From c54bb5440cc187b4ebbfb96f8ef9e015faa89724 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Wed, 19 Jun 2024 17:51:12 +0200 Subject: [PATCH 01/10] don't remove `netcdf4` from the upstream-dev environment --- ci/install-upstream-wheels.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/install-upstream-wheels.sh b/ci/install-upstream-wheels.sh index d728768439a..fc0b39d6f65 100755 --- a/ci/install-upstream-wheels.sh +++ b/ci/install-upstream-wheels.sh @@ -13,7 +13,7 @@ $conda remove -y numba numbagg sparse # temporarily remove numexpr $conda remove -y numexpr # temporarily remove backends -$conda remove -y cf_units hdf5 h5py netcdf4 pydap +$conda remove -y cf_units hdf5 h5py pydap # forcibly remove packages to avoid artifacts $conda remove -y --force \ numpy \ From 379d4089d7643963a81a39cd58fdcac2d53b8fd1 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Wed, 19 Jun 2024 19:26:48 +0200 Subject: [PATCH 02/10] also stop removing `h5py` and `hdf5` --- ci/install-upstream-wheels.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ci/install-upstream-wheels.sh b/ci/install-upstream-wheels.sh index fc0b39d6f65..aac37f4b945 100755 --- a/ci/install-upstream-wheels.sh +++ b/ci/install-upstream-wheels.sh @@ -13,7 +13,7 @@ $conda remove -y numba numbagg sparse # temporarily remove numexpr $conda remove -y numexpr # temporarily remove backends -$conda remove -y cf_units hdf5 h5py pydap +$conda remove -y cf_units h5py pydap # forcibly remove packages to avoid artifacts $conda remove -y --force \ numpy \ @@ -37,8 +37,7 @@ python -m pip install \ numpy \ scipy \ matplotlib \ - pandas \ - h5py + pandas # for some reason pandas depends on pyarrow already. # Remove once a `pyarrow` version compiled with `numpy>=2.0` is on `conda-forge` python -m pip install \ From 429f87d34814c011d7503669135cd05598dbd14c Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Wed, 19 Jun 2024 20:54:30 +0200 Subject: [PATCH 03/10] hard-code the precision (I believe this was missed in #9081) --- xarray/tests/test_backends.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 177700a5404..3d0f668baab 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -4285,7 +4285,7 @@ def test_roundtrip_coordinates_with_space(self) -> None: def test_roundtrip_numpy_datetime_data(self) -> None: # Override method in DatasetIOBase - remove not applicable # save_kwargs - times = pd.to_datetime(["2000-01-01", "2000-01-02", "NaT"]) + times = pd.to_datetime(["2000-01-01", "2000-01-02", "NaT"], unit="ns") expected = Dataset({"t": ("t", times), "t0": times[0]}) with self.roundtrip(expected) as actual: assert_identical(expected, actual) From 5863384301f0ec1d3b978898bc8b065aa7c2749a Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Wed, 19 Jun 2024 21:03:37 +0200 Subject: [PATCH 04/10] don't remove `h5py` either --- ci/install-upstream-wheels.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/install-upstream-wheels.sh b/ci/install-upstream-wheels.sh index aac37f4b945..79fae3c46a9 100755 --- a/ci/install-upstream-wheels.sh +++ b/ci/install-upstream-wheels.sh @@ -13,7 +13,7 @@ $conda remove -y numba numbagg sparse # temporarily remove numexpr $conda remove -y numexpr # temporarily remove backends -$conda remove -y cf_units h5py pydap +$conda remove -y cf_units pydap # forcibly remove packages to avoid artifacts $conda remove -y --force \ numpy \ From d463bc0bb1b75c2f4b1e2fa895bd18ebc60e4e21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Fri, 21 Jun 2024 13:34:09 +0200 Subject: [PATCH 05/10] use on-diks _FillValue as standrd expects, use view instead of cast to prevent OverflowError. --- xarray/coding/variables.py | 7 ++++--- xarray/tests/test_backends.py | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index d31cb6e626a..bc2ed5ca0c4 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -535,10 +535,11 @@ def decode(self, variable: Variable, name: T_Name = None) -> Variable: if unsigned == "true": unsigned_dtype = np.dtype(f"u{data.dtype.itemsize}") transform = partial(np.asarray, dtype=unsigned_dtype) - data = lazy_elemwise_func(data, transform, unsigned_dtype) if "_FillValue" in attrs: - new_fill = unsigned_dtype.type(attrs["_FillValue"]) - attrs["_FillValue"] = new_fill + # use view here to prevent OverflowError + new_fill = np.array(attrs["_FillValue"], dtype=data.dtype) + attrs["_FillValue"] = new_fill.view(unsigned_dtype) + data = lazy_elemwise_func(data, transform, unsigned_dtype) elif data.dtype.kind == "u": if unsigned == "false": signed_dtype = np.dtype(f"i{data.dtype.itemsize}") diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 3d0f668baab..fd917b35742 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -166,7 +166,7 @@ def create_encoded_masked_and_scaled_data(dtype: np.dtype) -> Dataset: def create_unsigned_masked_scaled_data(dtype: np.dtype) -> Dataset: encoding = { - "_FillValue": 255, + "_FillValue": -1, "_Unsigned": "true", "dtype": "i1", "add_offset": dtype.type(10), From 7e856ba0b2e48a68d60b4730a02f2259f3f60f33 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Sat, 22 Jun 2024 23:59:46 +0200 Subject: [PATCH 06/10] whats-new --- doc/whats-new.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 51a2c98fb9c..f028ef140ed 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -35,6 +35,8 @@ Deprecations Bug fixes ~~~~~~~~~ +- ``numpy>=2`` compatibility in the ``netcdf4`` backend (:pull:`9136`). + By `Justus Magin `_ and `Kai Mühlbauer `_. Documentation From f63208e763fa647d6ee7831bad8ffdbd774b88f8 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 27 Jun 2024 21:55:25 +0200 Subject: [PATCH 07/10] unpin `numpy` --- ci/requirements/all-but-dask.yml | 2 +- ci/requirements/environment-windows.yml | 2 +- ci/requirements/environment.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/requirements/all-but-dask.yml b/ci/requirements/all-but-dask.yml index abf6a88690a..119db282ad9 100644 --- a/ci/requirements/all-but-dask.yml +++ b/ci/requirements/all-but-dask.yml @@ -22,7 +22,7 @@ dependencies: - netcdf4 - numba - numbagg - - numpy<2 + - numpy - packaging - pandas - pint>=0.22 diff --git a/ci/requirements/environment-windows.yml b/ci/requirements/environment-windows.yml index 2eedc9b0621..896e390ea3e 100644 --- a/ci/requirements/environment-windows.yml +++ b/ci/requirements/environment-windows.yml @@ -23,7 +23,7 @@ dependencies: - netcdf4 - numba - numbagg - - numpy<2 + - numpy - packaging - pandas # - pint>=0.22 diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml index 317e1fe5f41..ef02a3e7f23 100644 --- a/ci/requirements/environment.yml +++ b/ci/requirements/environment.yml @@ -26,7 +26,7 @@ dependencies: - numba - numbagg - numexpr - - numpy<2 + - numpy - opt_einsum - packaging - pandas From a0844215c31ce27c965b82029b899984bda33778 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Fri, 28 Jun 2024 12:09:06 +0200 Subject: [PATCH 08/10] rework UnsignedCoder --- xarray/coding/variables.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index bc2ed5ca0c4..3fa1bb38bd0 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -516,10 +516,13 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable: dims, data, attrs, encoding = unpack_for_encoding(variable) pop_to(encoding, attrs, "_Unsigned") - signed_dtype = np.dtype(f"i{data.dtype.itemsize}") + # we need the on-disk type here + # trying to get it from encoding, resort to data.dtype if not available + signed_dtype = np.dtype(encoding.get("dtype", f"i{data.dtype.itemsize}")) if "_FillValue" in attrs: - new_fill = signed_dtype.type(attrs["_FillValue"]) - attrs["_FillValue"] = new_fill + new_fill = np.array(attrs["_FillValue"]) + # use view here to prevent OverflowError + attrs["_FillValue"] = new_fill.view(signed_dtype).item() data = duck_array_ops.astype(duck_array_ops.around(data), signed_dtype) return Variable(dims, data, attrs, encoding, fastpath=True) @@ -536,9 +539,9 @@ def decode(self, variable: Variable, name: T_Name = None) -> Variable: unsigned_dtype = np.dtype(f"u{data.dtype.itemsize}") transform = partial(np.asarray, dtype=unsigned_dtype) if "_FillValue" in attrs: - # use view here to prevent OverflowError new_fill = np.array(attrs["_FillValue"], dtype=data.dtype) - attrs["_FillValue"] = new_fill.view(unsigned_dtype) + # use view here to prevent OverflowError + attrs["_FillValue"] = new_fill.view(unsigned_dtype).item() data = lazy_elemwise_func(data, transform, unsigned_dtype) elif data.dtype.kind == "u": if unsigned == "false": From 1cc2f8a935e4cea9f8e15b19ac30a2230fdbcaf0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Fri, 28 Jun 2024 12:55:40 +0200 Subject: [PATCH 09/10] add test --- xarray/tests/test_backends.py | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index fd917b35742..f3a56e6d931 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -166,7 +166,7 @@ def create_encoded_masked_and_scaled_data(dtype: np.dtype) -> Dataset: def create_unsigned_masked_scaled_data(dtype: np.dtype) -> Dataset: encoding = { - "_FillValue": -1, + "_FillValue": np.int8(-1), "_Unsigned": "true", "dtype": "i1", "add_offset": dtype.type(10), @@ -925,6 +925,35 @@ def test_roundtrip_mask_and_scale(self, decoded_fn, encoded_fn, dtype) -> None: assert decoded.variables[k].dtype == actual.variables[k].dtype assert_allclose(decoded, actual, decode_bytes=False) + @pytest.mark.parametrize("fillvalue", [np.int8(-1), np.uint8(255)]) + def test_roundtrip_unsigned(self, fillvalue): + # regression/numpy2 test for + encoding = { + "_FillValue": fillvalue, + "_Unsigned": "true", + "dtype": "i1", + } + x = np.array([0, 1, 127, 128, 254, np.nan], dtype=np.float32) + decoded = Dataset({"x": ("t", x, {}, encoding)}) + + attributes = { + "_FillValue": fillvalue, + "_Unsigned": "true", + } + # Create unsigned data corresponding to [0, 1, 127, 128, 255] unsigned + sb = np.asarray([0, 1, 127, -128, -2, -1], dtype="i1") + encoded = Dataset({"x": ("t", sb, attributes)}) + + with self.roundtrip(decoded) as actual: + for k in decoded.variables: + assert decoded.variables[k].dtype == actual.variables[k].dtype + assert_allclose(decoded, actual, decode_bytes=False) + + with self.roundtrip(decoded, open_kwargs=dict(decode_cf=False)) as actual: + for k in encoded.variables: + assert encoded.variables[k].dtype == actual.variables[k].dtype + assert_allclose(encoded, actual, decode_bytes=False) + @staticmethod def _create_cf_dataset(): original = Dataset( From 41744243cdaee0dc983ee7b8f992cc6291cce471 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Fri, 28 Jun 2024 23:03:09 +0200 Subject: [PATCH 10/10] Update xarray/coding/variables.py Co-authored-by: Justus Magin --- xarray/coding/variables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 3fa1bb38bd0..d19f285d2b9 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -517,7 +517,7 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable: pop_to(encoding, attrs, "_Unsigned") # we need the on-disk type here - # trying to get it from encoding, resort to data.dtype if not available + # trying to get it from encoding, resort to an int with the same precision as data.dtype if not available signed_dtype = np.dtype(encoding.get("dtype", f"i{data.dtype.itemsize}")) if "_FillValue" in attrs: new_fill = np.array(attrs["_FillValue"])