Skip to content

Commit

Permalink
Merge branch 'main' into eval
Browse files Browse the repository at this point in the history
  • Loading branch information
max-sixty authored Dec 4, 2023
2 parents 875274d + 449c31a commit 3b493ae
Show file tree
Hide file tree
Showing 8 changed files with 93 additions and 48 deletions.
8 changes: 4 additions & 4 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,24 +18,24 @@ repos:
files: ^xarray/
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: 'v0.1.4'
rev: 'v0.1.6'
hooks:
- id: ruff
args: ["--fix"]
# https://github.com/python/black#version-control-integration
- repo: https://github.com/psf/black
rev: 23.10.1
rev: 23.11.0
hooks:
- id: black-jupyter
- repo: https://github.com/keewis/blackdoc
rev: v0.3.9
hooks:
- id: blackdoc
exclude: "generate_aggregations.py"
additional_dependencies: ["black==23.10.1"]
additional_dependencies: ["black==23.11.0"]
- id: blackdoc-autoupdate-black
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.6.1
rev: v1.7.1
hooks:
- id: mypy
# Copied from setup.cfg
Expand Down
3 changes: 3 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ New Features
- Add a :py:meth:`Dataset.eval` method, similar to the pandas' method of the
same name. (:pull:`7163`). This is currently marked as experimental and
doesn't yet support the ``numexpr`` engine.
- :py:meth:`Dataset.drop_vars` & :py:meth:`DataArray.drop_vars` allow passing a
callable, similar to :py:meth:`Dataset.where` & :py:meth:`Dataset.sortby` & others.
(:pull:`8511`).
By `Maximilian Roos <https://github.com/max-sixty>`_.

Breaking changes
Expand Down
34 changes: 16 additions & 18 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ def _calc_assign_results(

def assign_coords(
self,
coords: Mapping[Any, Any] | None = None,
coords: Mapping | None = None,
**coords_kwargs: Any,
) -> Self:
"""Assign new coordinates to this object.
Expand All @@ -486,15 +486,21 @@ def assign_coords(
Parameters
----------
coords : dict-like or None, optional
A dict where the keys are the names of the coordinates
with the new values to assign. If the values are callable, they are
computed on this object and assigned to new coordinate variables.
If the values are not callable, (e.g. a ``DataArray``, scalar, or
array), they are simply assigned. A new coordinate can also be
defined and attached to an existing dimension using a tuple with
the first element the dimension name and the second element the
values for this new coordinate.
coords : mapping of dim to coord, optional
A mapping whose keys are the names of the coordinates and values are the
coordinates to assign. The mapping will generally be a dict or
:class:`Coordinates`.
* If a value is a standard data value — for example, a ``DataArray``,
scalar, or array — the data is simply assigned as a coordinate.
* If a value is callable, it is called with this object as the only
parameter, and the return value is used as new coordinate variables.
* A coordinate can also be defined and attached to an existing dimension
using a tuple with the first element the dimension name and the second
element the values for this new coordinate.
**coords_kwargs : optional
The keyword arguments form of ``coords``.
One of ``coords`` or ``coords_kwargs`` must be provided.
Expand Down Expand Up @@ -595,14 +601,6 @@ def assign_coords(
Attributes:
description: Weather-related data
Notes
-----
Since ``coords_kwargs`` is a dictionary, the order of your arguments
may not be preserved, and so the order of the new variables is not well
defined. Assigning multiple variables within the same ``assign_coords``
is possible, but you cannot reference other variables created within
the same ``assign_coords`` call.
See Also
--------
Dataset.assign
Expand Down
18 changes: 13 additions & 5 deletions xarray/core/coordinates.py
Original file line number Diff line number Diff line change
Expand Up @@ -571,11 +571,18 @@ def assign(self, coords: Mapping | None = None, **coords_kwargs: Any) -> Self:
Parameters
----------
coords : :class:`Coordinates` or mapping of hashable to Any
Mapping from coordinate names to the new values. If a ``Coordinates``
object is passed, its indexes are assigned in the returned object.
Otherwise, a default (pandas) index is created for each dimension
coordinate found in the mapping.
coords : mapping of dim to coord, optional
A mapping whose keys are the names of the coordinates and values are the
coordinates to assign. The mapping will generally be a dict or
:class:`Coordinates`.
* If a value is a standard data value — for example, a ``DataArray``,
scalar, or array — the data is simply assigned as a coordinate.
* A coordinate can also be defined and attached to an existing dimension
using a tuple with the first element the dimension name and the second
element the values for this new coordinate.
**coords_kwargs
The keyword arguments form of ``coords``.
One of ``coords`` or ``coords_kwargs`` must be provided.
Expand Down Expand Up @@ -605,6 +612,7 @@ def assign(self, coords: Mapping | None = None, **coords_kwargs: Any) -> Self:
* y_level_1 (y) int64 0 1 0 1
"""
# TODO: this doesn't support a callable, which is inconsistent with `DataArray.assign_coords`
coords = either_dict_or_kwargs(coords, coords_kwargs, "assign")
new_coords = self.copy()
new_coords.update(coords)
Expand Down
17 changes: 14 additions & 3 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -3041,16 +3041,17 @@ def T(self) -> Self:

def drop_vars(
self,
names: Hashable | Iterable[Hashable],
names: str | Iterable[Hashable] | Callable[[Self], str | Iterable[Hashable]],
*,
errors: ErrorOptions = "raise",
) -> Self:
"""Returns an array with dropped variables.
Parameters
----------
names : Hashable or iterable of Hashable
Name(s) of variables to drop.
names : Hashable or iterable of Hashable or Callable
Name(s) of variables to drop. If a Callable, this object is passed as its
only argument and its result is used.
errors : {"raise", "ignore"}, default: "raise"
If 'raise', raises a ValueError error if any of the variable
passed are not in the dataset. If 'ignore', any given names that are in the
Expand Down Expand Up @@ -3100,7 +3101,17 @@ def drop_vars(
[ 6, 7, 8],
[ 9, 10, 11]])
Dimensions without coordinates: x, y
>>> da.drop_vars(lambda x: x.coords)
<xarray.DataArray (x: 4, y: 3)>
array([[ 0, 1, 2],
[ 3, 4, 5],
[ 6, 7, 8],
[ 9, 10, 11]])
Dimensions without coordinates: x, y
"""
if callable(names):
names = names(self)
ds = self._to_temp_dataset().drop_vars(names, errors=errors)
return self._from_temp_dataset(ds)

Expand Down
49 changes: 33 additions & 16 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5742,16 +5742,17 @@ def _assert_all_in_dataset(

def drop_vars(
self,
names: Hashable | Iterable[Hashable],
names: str | Iterable[Hashable] | Callable[[Self], str | Iterable[Hashable]],
*,
errors: ErrorOptions = "raise",
) -> Self:
"""Drop variables from this dataset.
Parameters
----------
names : hashable or iterable of hashable
Name(s) of variables to drop.
names : Hashable or iterable of Hashable or Callable
Name(s) of variables to drop. If a Callable, this object is passed as its
only argument and its result is used.
errors : {"raise", "ignore"}, default: "raise"
If 'raise', raises a ValueError error if any of the variable
passed are not in the dataset. If 'ignore', any given names that are in the
Expand Down Expand Up @@ -5793,7 +5794,7 @@ def drop_vars(
humidity (time, latitude, longitude) float64 65.0 63.8 58.2 59.6
wind_speed (time, latitude, longitude) float64 10.2 8.5 12.1 9.8
# Drop the 'humidity' variable
Drop the 'humidity' variable
>>> dataset.drop_vars(["humidity"])
<xarray.Dataset>
Expand All @@ -5806,7 +5807,7 @@ def drop_vars(
temperature (time, latitude, longitude) float64 25.5 26.3 27.1 28.0
wind_speed (time, latitude, longitude) float64 10.2 8.5 12.1 9.8
# Drop the 'humidity', 'temperature' variables
Drop the 'humidity', 'temperature' variables
>>> dataset.drop_vars(["humidity", "temperature"])
<xarray.Dataset>
Expand All @@ -5818,7 +5819,18 @@ def drop_vars(
Data variables:
wind_speed (time, latitude, longitude) float64 10.2 8.5 12.1 9.8
# Attempt to drop non-existent variable with errors="ignore"
Drop all indexes
>>> dataset.drop_vars(lambda x: x.indexes)
<xarray.Dataset>
Dimensions: (time: 1, latitude: 2, longitude: 2)
Dimensions without coordinates: time, latitude, longitude
Data variables:
temperature (time, latitude, longitude) float64 25.5 26.3 27.1 28.0
humidity (time, latitude, longitude) float64 65.0 63.8 58.2 59.6
wind_speed (time, latitude, longitude) float64 10.2 8.5 12.1 9.8
Attempt to drop non-existent variable with errors="ignore"
>>> dataset.drop_vars(["pressure"], errors="ignore")
<xarray.Dataset>
Expand All @@ -5832,7 +5844,7 @@ def drop_vars(
humidity (time, latitude, longitude) float64 65.0 63.8 58.2 59.6
wind_speed (time, latitude, longitude) float64 10.2 8.5 12.1 9.8
# Attempt to drop non-existent variable with errors="raise"
Attempt to drop non-existent variable with errors="raise"
>>> dataset.drop_vars(["pressure"], errors="raise")
Traceback (most recent call last):
Expand All @@ -5852,36 +5864,38 @@ def drop_vars(
DataArray.drop_vars
"""
if callable(names):
names = names(self)
# the Iterable check is required for mypy
if is_scalar(names) or not isinstance(names, Iterable):
names = {names}
names_set = {names}
else:
names = set(names)
names_set = set(names)
if errors == "raise":
self._assert_all_in_dataset(names)
self._assert_all_in_dataset(names_set)

# GH6505
other_names = set()
for var in names:
for var in names_set:
maybe_midx = self._indexes.get(var, None)
if isinstance(maybe_midx, PandasMultiIndex):
idx_coord_names = set(maybe_midx.index.names + [maybe_midx.dim])
idx_other_names = idx_coord_names - set(names)
idx_other_names = idx_coord_names - set(names_set)
other_names.update(idx_other_names)
if other_names:
names |= set(other_names)
names_set |= set(other_names)
warnings.warn(
f"Deleting a single level of a MultiIndex is deprecated. Previously, this deleted all levels of a MultiIndex. "
f"Please also drop the following variables: {other_names!r} to avoid an error in the future.",
DeprecationWarning,
stacklevel=2,
)

assert_no_index_corrupted(self.xindexes, names)
assert_no_index_corrupted(self.xindexes, names_set)

variables = {k: v for k, v in self._variables.items() if k not in names}
variables = {k: v for k, v in self._variables.items() if k not in names_set}
coord_names = {k for k in self._coord_names if k in variables}
indexes = {k: v for k, v in self._indexes.items() if k not in names}
indexes = {k: v for k, v in self._indexes.items() if k not in names_set}
return self._replace_with_new_dims(
variables, coord_names=coord_names, indexes=indexes
)
Expand Down Expand Up @@ -5979,6 +5993,9 @@ def drop(
"dropping variables using `drop` is deprecated; use drop_vars.",
DeprecationWarning,
)
# for mypy
if is_scalar(labels):
labels = [labels]
return self.drop_vars(labels, errors=errors)
if dim is not None:
warnings.warn(
Expand Down
4 changes: 2 additions & 2 deletions xarray/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def _drop_coords(self) -> T_Xarray:
obj = self._obj
for k, v in obj.coords.items():
if k != self._dim and self._dim in v.dims:
obj = obj.drop_vars(k)
obj = obj.drop_vars([k])
return obj

def pad(self, tolerance: float | Iterable[float] | None = None) -> T_Xarray:
Expand Down Expand Up @@ -244,7 +244,7 @@ def map(
# dimension, then we need to do so before we can rename the proxy
# dimension we used.
if self._dim in combined.coords:
combined = combined.drop_vars(self._dim)
combined = combined.drop_vars([self._dim])

if RESAMPLE_DIM in combined.dims:
combined = combined.rename({RESAMPLE_DIM: self._dim})
Expand Down
8 changes: 8 additions & 0 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -2652,6 +2652,14 @@ def test_drop_coordinates(self) -> None:
actual = renamed.drop_vars("foo", errors="ignore")
assert_identical(actual, renamed)

def test_drop_vars_callable(self) -> None:
A = DataArray(
np.random.randn(2, 3), dims=["x", "y"], coords={"x": [1, 2], "y": [3, 4, 5]}
)
expected = A.drop_vars(["x", "y"])
actual = A.drop_vars(lambda x: x.indexes)
assert_identical(expected, actual)

def test_drop_multiindex_level(self) -> None:
# GH6505
expected = self.mda.drop_vars(["x", "level_1", "level_2"])
Expand Down

0 comments on commit 3b493ae

Please sign in to comment.