From 3deee7bb535dba9a48ee590c7f5119a7f2d779be Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Sun, 30 Jun 2024 18:46:30 +0200 Subject: [PATCH] properly diff objects with arrays as attributes on variables (#9169) * move the attr comparison into a common function * check that we can actually diff objects with array attrs * whats-new entry * Add property test * Add more dtypes * Better test * Fix skip * Use simple attrs strategy --------- Co-authored-by: Deepak Cherian Co-authored-by: Deepak Cherian --- doc/whats-new.rst | 2 ++ properties/test_properties.py | 17 +++++++++++++++++ xarray/core/formatting.py | 18 ++++++++++++------ xarray/testing/strategies.py | 6 +++++- xarray/tests/test_formatting.py | 30 ++++++++++++++++++++++++++++++ 5 files changed, 66 insertions(+), 7 deletions(-) create mode 100644 properties/test_properties.py diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 0174e16602f..f3ab5d46e1d 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -39,6 +39,8 @@ Bug fixes ~~~~~~~~~ - Make :py:func:`testing.assert_allclose` work with numpy 2.0 (:issue:`9165`, :pull:`9166`). By `Pontus Lurcock `_. +- Allow diffing objects with array attributes on variables (:issue:`9153`, :pull:`9169`). + By `Justus Magin `_. - Promote floating-point numeric datetimes before decoding (:issue:`9179`, :pull:`9182`). By `Justus Magin `_. diff --git a/properties/test_properties.py b/properties/test_properties.py new file mode 100644 index 00000000000..fc0a1955539 --- /dev/null +++ b/properties/test_properties.py @@ -0,0 +1,17 @@ +import pytest + +pytest.importorskip("hypothesis") + +from hypothesis import given + +import xarray as xr +import xarray.testing.strategies as xrst + + +@given(attrs=xrst.simple_attrs) +def test_assert_identical(attrs): + v = xr.Variable(dims=(), data=0, attrs=attrs) + xr.testing.assert_identical(v, v.copy(deep=True)) + + ds = xr.Dataset(attrs=attrs) + xr.testing.assert_identical(ds, ds.copy(deep=True)) diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index c15df34b5b1..5c4a3015843 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -765,6 +765,12 @@ def _diff_mapping_repr( a_indexes=None, b_indexes=None, ): + def compare_attr(a, b): + if is_duck_array(a) or is_duck_array(b): + return array_equiv(a, b) + else: + return a == b + def extra_items_repr(extra_keys, mapping, ab_side, kwargs): extra_repr = [ summarizer(k, mapping[k], col_width, **kwargs[k]) for k in extra_keys @@ -801,11 +807,7 @@ def extra_items_repr(extra_keys, mapping, ab_side, kwargs): is_variable = True except AttributeError: # compare attribute value - if is_duck_array(a_mapping[k]) or is_duck_array(b_mapping[k]): - compatible = array_equiv(a_mapping[k], b_mapping[k]) - else: - compatible = a_mapping[k] == b_mapping[k] - + compatible = compare_attr(a_mapping[k], b_mapping[k]) is_variable = False if not compatible: @@ -821,7 +823,11 @@ def extra_items_repr(extra_keys, mapping, ab_side, kwargs): attrs_to_print = set(a_attrs) ^ set(b_attrs) attrs_to_print.update( - {k for k in set(a_attrs) & set(b_attrs) if a_attrs[k] != b_attrs[k]} + { + k + for k in set(a_attrs) & set(b_attrs) + if not compare_attr(a_attrs[k], b_attrs[k]) + } ) for m in (a_mapping, b_mapping): attr_s = "\n".join( diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 449d0c793cc..085b70e518b 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -192,10 +192,14 @@ def dimension_sizes( max_side=2, max_dims=2, ), - dtype=npst.scalar_dtypes(), + dtype=npst.scalar_dtypes() + | npst.byte_string_dtypes() + | npst.unicode_string_dtypes(), ) _attr_values = st.none() | st.booleans() | _readable_strings | _small_arrays +simple_attrs = st.dictionaries(_attr_keys, _attr_values) + def attrs() -> st.SearchStrategy[Mapping[Hashable, Any]]: """ diff --git a/xarray/tests/test_formatting.py b/xarray/tests/test_formatting.py index d7a46eeaefc..6c49ab456f6 100644 --- a/xarray/tests/test_formatting.py +++ b/xarray/tests/test_formatting.py @@ -399,6 +399,36 @@ def test_diff_attrs_repr_with_array(self) -> None: actual = formatting.diff_attrs_repr(attrs_a, attrs_c, "equals") assert expected == actual + def test__diff_mapping_repr_array_attrs_on_variables(self) -> None: + a = { + "a": xr.DataArray( + dims="x", + data=np.array([1], dtype="int16"), + attrs={"b": np.array([1, 2], dtype="int8")}, + ) + } + b = { + "a": xr.DataArray( + dims="x", + data=np.array([1], dtype="int16"), + attrs={"b": np.array([2, 3], dtype="int8")}, + ) + } + actual = formatting.diff_data_vars_repr(a, b, compat="identical", col_width=8) + expected = dedent( + """\ + Differing data variables: + L a (x) int16 2B 1 + Differing variable attributes: + b: [1 2] + R a (x) int16 2B 1 + Differing variable attributes: + b: [2 3] + """.rstrip() + ) + + assert actual == expected + def test_diff_dataset_repr(self) -> None: ds_a = xr.Dataset( data_vars={