Skip to content

Commit 59378cc

Browse files
mgunyhodcherian
andauthored
Raise exception in to_dataset if resulting variable is also the name of a coordinate (#8433)
* Add tests for issue #7823 * Use 2D array to properly test to_dataset error handling logic * Raise exception if a variable is also a coordinate in to_dataset Co-authored-by: Deepak Cherian <dcherian@users.noreply.github.com> * Update whats-new * Ensure that coordinates are in the original order --------- Co-authored-by: Deepak Cherian <dcherian@users.noreply.github.com>
1 parent f0ade3d commit 59378cc

File tree

3 files changed

+75
-4
lines changed

3 files changed

+75
-4
lines changed

doc/whats-new.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,9 @@ Bug fixes
7272
- Fix to once again support date offset strings as input to the loffset
7373
parameter of resample and test this functionality (:pull:`8422`, :issue:`8399`).
7474
By `Katelyn FitzGerald <https://github.com/kafitzgerald>`_.
75+
- Fix a bug where :py:meth:`DataArray.to_dataset` silently drops a variable
76+
if a coordinate with the same name already exists (:pull:`8433`, :issue:`7823`).
77+
By `András Gunyhó <https://github.com/mgunyho>`_.
7578

7679
Documentation
7780
~~~~~~~~~~~~~

xarray/core/dataarray.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -574,9 +574,24 @@ def subset(dim, label):
574574
array.attrs = {}
575575
return as_variable(array)
576576

577-
variables = {label: subset(dim, label) for label in self.get_index(dim)}
578-
variables.update({k: v for k, v in self._coords.items() if k != dim})
577+
variables_from_split = {
578+
label: subset(dim, label) for label in self.get_index(dim)
579+
}
579580
coord_names = set(self._coords) - {dim}
581+
582+
ambiguous_vars = set(variables_from_split) & coord_names
583+
if ambiguous_vars:
584+
rename_msg_fmt = ", ".join([f"{v}=..." for v in sorted(ambiguous_vars)])
585+
raise ValueError(
586+
f"Splitting along the dimension {dim!r} would produce the variables "
587+
f"{tuple(sorted(ambiguous_vars))} which are also existing coordinate "
588+
f"variables. Use DataArray.rename({rename_msg_fmt}) or "
589+
f"DataArray.assign_coords({dim}=...) to resolve this ambiguity."
590+
)
591+
592+
variables = variables_from_split | {
593+
k: v for k, v in self._coords.items() if k != dim
594+
}
580595
indexes = filter_indexes_from_coords(self._indexes, coord_names)
581596
dataset = Dataset._construct_direct(
582597
variables, coord_names, indexes=indexes, attrs=self.attrs

xarray/tests/test_dataarray.py

Lines changed: 55 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3686,8 +3686,16 @@ def test_to_dataset_whole(self) -> None:
36863686
actual = named.to_dataset("bar")
36873687

36883688
def test_to_dataset_split(self) -> None:
3689-
array = DataArray([1, 2, 3], coords=[("x", list("abc"))], attrs={"a": 1})
3690-
expected = Dataset({"a": 1, "b": 2, "c": 3}, attrs={"a": 1})
3689+
array = DataArray(
3690+
[[1, 2], [3, 4], [5, 6]],
3691+
coords=[("x", list("abc")), ("y", [0.0, 0.1])],
3692+
attrs={"a": 1},
3693+
)
3694+
expected = Dataset(
3695+
{"a": ("y", [1, 2]), "b": ("y", [3, 4]), "c": ("y", [5, 6])},
3696+
coords={"y": [0.0, 0.1]},
3697+
attrs={"a": 1},
3698+
)
36913699
actual = array.to_dataset("x")
36923700
assert_identical(expected, actual)
36933701

@@ -3715,6 +3723,51 @@ def test_to_dataset_retains_keys(self) -> None:
37153723

37163724
assert_equal(array, result)
37173725

3726+
def test_to_dataset_coord_value_is_dim(self) -> None:
3727+
# github issue #7823
3728+
3729+
array = DataArray(
3730+
np.zeros((3, 3)),
3731+
coords={
3732+
# 'a' is both a coordinate value and the name of a coordinate
3733+
"x": ["a", "b", "c"],
3734+
"a": [1, 2, 3],
3735+
},
3736+
)
3737+
3738+
with pytest.raises(
3739+
ValueError,
3740+
match=(
3741+
re.escape("dimension 'x' would produce the variables ('a',)")
3742+
+ ".*"
3743+
+ re.escape("DataArray.rename(a=...) or DataArray.assign_coords(x=...)")
3744+
),
3745+
):
3746+
array.to_dataset("x")
3747+
3748+
# test error message formatting when there are multiple ambiguous
3749+
# values/coordinates
3750+
array2 = DataArray(
3751+
np.zeros((3, 3, 2)),
3752+
coords={
3753+
"x": ["a", "b", "c"],
3754+
"a": [1, 2, 3],
3755+
"b": [0.0, 0.1],
3756+
},
3757+
)
3758+
3759+
with pytest.raises(
3760+
ValueError,
3761+
match=(
3762+
re.escape("dimension 'x' would produce the variables ('a', 'b')")
3763+
+ ".*"
3764+
+ re.escape(
3765+
"DataArray.rename(a=..., b=...) or DataArray.assign_coords(x=...)"
3766+
)
3767+
),
3768+
):
3769+
array2.to_dataset("x")
3770+
37183771
def test__title_for_slice(self) -> None:
37193772
array = DataArray(
37203773
np.ones((4, 3, 2)),

0 commit comments

Comments
 (0)