From 97b5aa93644088c33f9481d34edddbd9075caf76 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Tue, 30 Jul 2024 09:16:31 -0400 Subject: [PATCH 1/5] regression test --- xarray/tests/test_datatree.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/xarray/tests/test_datatree.py b/xarray/tests/test_datatree.py index 31d77ca17e7..20a7915feff 100644 --- a/xarray/tests/test_datatree.py +++ b/xarray/tests/test_datatree.py @@ -561,6 +561,24 @@ def test_roundtrip_unnamed_root(self, simple_datatree): roundtrip = DataTree.from_dict(dt.to_dict()) assert roundtrip.equals(dt) + def test_insertion_order(self): + # regression test for GH issue #9276 + reversed = DataTree.from_dict( + { + "/Homer/Bart": xr.Dataset({"age": 10}), + "/Homer": xr.Dataset({"age": 39}), + "/": xr.Dataset({"age": 83}), + } + ) + expected = DataTree.from_dict( + { + "/": xr.Dataset({"age": 83}), + "/Homer": xr.Dataset({"age": 39}), + "/Homer/Bart": xr.Dataset({"age": 10}), + } + ) + assert reversed.equals(expected) + class TestDatasetView: def test_view_contents(self): From f32d262cda3d196fb0652229d5706e7edbc6f17c Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Tue, 30 Jul 2024 09:16:46 -0400 Subject: [PATCH 2/5] fix by sorting before insertion --- xarray/core/datatree.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py index 65ff8667cb7..b73e1a84a21 100644 --- a/xarray/core/datatree.py +++ b/xarray/core/datatree.py @@ -1104,7 +1104,8 @@ def from_dict( if d: # Populate tree with children determined from data_objects mapping - for path, data in d.items(): + # Sort keys so as to insert nodes from root first (see GH issue #9276) + for path, data in sorted(d.items()): # Create and set new node node_name = NodePath(path).name if isinstance(data, DataTree): From d41a78f453b6d1544fd35294008ad9683b8ebba4 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Tue, 30 Jul 2024 09:24:26 -0400 Subject: [PATCH 3/5] whatsnew --- doc/whats-new.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index e959ec111f5..0d146a7fd0d 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -35,6 +35,8 @@ Deprecations Bug fixes ~~~~~~~~~ +- Fix bug causing `DataTree.from_dict` to be sensitive to insertion order (:issue:`9276`, :pull:`9292`). + By `Tom Nicholas `_. Documentation ~~~~~~~~~~~~~ From 3e06608210b96d89b0c70931a8cc00e03a8662c3 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Wed, 31 Jul 2024 02:39:48 -0400 Subject: [PATCH 4/5] test nodes retain insertion order within a group --- xarray/tests/test_datatree.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/xarray/tests/test_datatree.py b/xarray/tests/test_datatree.py index 20a7915feff..c875322b9c5 100644 --- a/xarray/tests/test_datatree.py +++ b/xarray/tests/test_datatree.py @@ -565,6 +565,7 @@ def test_insertion_order(self): # regression test for GH issue #9276 reversed = DataTree.from_dict( { + "/Homer/Lisa": xr.Dataset({"age": 8}), "/Homer/Bart": xr.Dataset({"age": 10}), "/Homer": xr.Dataset({"age": 39}), "/": xr.Dataset({"age": 83}), @@ -574,11 +575,16 @@ def test_insertion_order(self): { "/": xr.Dataset({"age": 83}), "/Homer": xr.Dataset({"age": 39}), + "/Homer/Lisa": xr.Dataset({"age": 8}), "/Homer/Bart": xr.Dataset({"age": 10}), } ) assert reversed.equals(expected) + # Check that Bart and Lisa's order is still preserved within the group, + # despite 'Bart' coming before 'Lisa' when sorted alphabetically + assert list(reversed["Homer"].children.keys()) == ["Lisa", "Bart"] + class TestDatasetView: def test_view_contents(self): From 8a59da47eea639d9ec04af062cedd8bb0db1ff21 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Wed, 31 Jul 2024 02:40:06 -0400 Subject: [PATCH 5/5] make test pass by sorting by depth --- xarray/core/datatree.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py index b73e1a84a21..f8a8a5fbc18 100644 --- a/xarray/core/datatree.py +++ b/xarray/core/datatree.py @@ -1102,10 +1102,14 @@ def from_dict( else: obj = cls(name=name, data=root_data, parent=None, children=None) + def depth(item) -> int: + pathstr, _ = item + return len(NodePath(pathstr).parts) + if d: # Populate tree with children determined from data_objects mapping - # Sort keys so as to insert nodes from root first (see GH issue #9276) - for path, data in sorted(d.items()): + # Sort keys by depth so as to insert nodes from root first (see GH issue #9276) + for path, data in sorted(d.items(), key=depth): # Create and set new node node_name = NodePath(path).name if isinstance(data, DataTree):