Skip to content

Commit

Permalink
Merge branch 'main' into named-array
Browse files Browse the repository at this point in the history
  • Loading branch information
andersy005 authored Aug 30, 2023
2 parents c55f35a + afda88e commit 2335bba
Show file tree
Hide file tree
Showing 12 changed files with 289 additions and 114 deletions.
34 changes: 28 additions & 6 deletions .codecov.yml
Original file line number Diff line number Diff line change
@@ -1,16 +1,38 @@
codecov:
ci:
# by default, codecov doesn't recognize azure as a CI provider
- dev.azure.com
require_ci_to_pass: yes
require_ci_to_pass: true

coverage:
status:
project:
default:
# Require 1% coverage, i.e., always succeed
target: 1
target: 1%
flags:
- unittests
paths:
- "!xarray/tests/"
unittests:
target: 90%
flags:
- unittests
paths:
- "!xarray/tests/"
mypy:
target: 20%
flags:
- mypy
patch: false
changes: false

comment: off
comment: false

flags:
unittests:
paths:
- "xarray"
- "!xarray/tests"
carryforward: false
mypy:
paths:
- "xarray"
carryforward: false
4 changes: 2 additions & 2 deletions .github/workflows/ci-additional.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ jobs:
- name: Run mypy
run: |
python -m mypy --install-types --non-interactive --cobertura-xml-report mypy_report
python -m mypy --install-types --non-interactive --cobertura-xml-report mypy_report xarray/
- name: Upload mypy coverage to Codecov
uses: codecov/codecov-action@v3.1.4
Expand Down Expand Up @@ -177,7 +177,7 @@ jobs:
- name: Run mypy
run: |
python -m mypy --install-types --non-interactive --cobertura-xml-report mypy_report
python -m mypy --install-types --non-interactive --cobertura-xml-report mypy_report xarray/
- name: Upload mypy coverage to Codecov
uses: codecov/codecov-action@v3.1.4
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# xarray: N-D labeled arrays and datasets

[![CI](https://github.com/pydata/xarray/workflows/CI/badge.svg?branch=main)](https://github.com/pydata/xarray/actions?query=workflow%3ACI)
[![Code coverage](https://codecov.io/gh/pydata/xarray/branch/main/graph/badge.svg)](https://codecov.io/gh/pydata/xarray)
[![Code coverage](https://codecov.io/gh/pydata/xarray/branch/main/graph/badge.svg?flag=unittests)](https://codecov.io/gh/pydata/xarray)
[![Docs](https://readthedocs.org/projects/xray/badge/?version=latest)](https://docs.xarray.dev/)
[![Benchmarked with asv](https://img.shields.io/badge/benchmarked%20by-asv-green.svg?style=flat)](https://pandas.pydata.org/speed/xarray/)
[![Available on pypi](https://img.shields.io/pypi/v/xarray.svg)](https://pypi.python.org/pypi/xarray/)
Expand Down
15 changes: 13 additions & 2 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ What's New
np.random.seed(123456)
.. _whats-new.2023.08.1:

v2023.08.1 (unreleased)
Expand All @@ -31,10 +30,23 @@ Breaking changes
Deprecations
~~~~~~~~~~~~

- Deprecate passing a :py:class:`pandas.MultiIndex` object directly to the
:py:class:`Dataset` and :py:class:`DataArray` constructors as well as to
:py:meth:`Dataset.assign` and :py:meth:`Dataset.assign_coords`.
A new Xarray :py:class:`Coordinates` object has to be created first using
:py:meth:`Coordinates.from_pandas_multiindex` (:pull:`8094`).
By `Benoît Bovy <https://github.com/benbovy>`_.

Bug fixes
~~~~~~~~~

- Improved handling of multi-coordinate indexes when updating coordinates, including bug fixes
(and improved warnings for deprecated features) for pandas multi-indexes (:pull:`8094`).
By `Benoît Bovy <https://github.com/benbovy>`_.
- Fixed a bug in :py:func:`merge` with ``compat='minimal'`` where the coordinate
names were not updated properly internally (:issue:`7405`, :issue:`7588`,
:pull:`8104`).
By `Benoît Bovy <https://github.com/benbovy>`_.

Documentation
~~~~~~~~~~~~~
Expand Down Expand Up @@ -120,7 +132,6 @@ Breaking changes
numbagg 0.1 0.2.1
===================== ========= ========


Documentation
~~~~~~~~~~~~~

Expand Down
193 changes: 115 additions & 78 deletions xarray/core/coordinates.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations

import warnings
from collections.abc import Hashable, Iterator, Mapping, Sequence
from contextlib import contextmanager
from typing import (
Expand All @@ -24,7 +23,7 @@
)
from xarray.core.merge import merge_coordinates_without_align, merge_coords
from xarray.core.types import Self, T_DataArray
from xarray.core.utils import Frozen, ReprObject
from xarray.core.utils import Frozen, ReprObject, emit_user_level_warning
from xarray.core.variable import Variable, as_variable, calculate_dimensions

if TYPE_CHECKING:
Expand Down Expand Up @@ -83,7 +82,7 @@ def variables(self):
def _update_coords(self, coords, indexes):
raise NotImplementedError()

def _maybe_drop_multiindex_coords(self, coords):
def _drop_coords(self, coord_names):
raise NotImplementedError()

def __iter__(self) -> Iterator[Hashable]:
Expand Down Expand Up @@ -379,9 +378,9 @@ def _update_coords(
# redirect to DatasetCoordinates._update_coords
self._data.coords._update_coords(coords, indexes)

def _maybe_drop_multiindex_coords(self, coords: set[Hashable]) -> None:
# redirect to DatasetCoordinates._maybe_drop_multiindex_coords
self._data.coords._maybe_drop_multiindex_coords(coords)
def _drop_coords(self, coord_names):
# redirect to DatasetCoordinates._drop_coords
self._data.coords._drop_coords(coord_names)

def _merge_raw(self, other, reflexive):
"""For use with binary arithmetic."""
Expand Down Expand Up @@ -454,22 +453,40 @@ def __setitem__(self, key: Hashable, value: Any) -> None:

def update(self, other: Mapping[Any, Any]) -> None:
"""Update this Coordinates variables with other coordinate variables."""
other_obj: Coordinates | Mapping[Hashable, Variable]

if not len(other):
return

other_coords: Coordinates

if isinstance(other, Coordinates):
# special case: default indexes won't be created
other_obj = other
# Coordinates object: just pass it (default indexes won't be created)
other_coords = other
else:
other_obj = getattr(other, "variables", other)
other_coords = create_coords_with_default_indexes(
getattr(other, "variables", other)
)

self._maybe_drop_multiindex_coords(set(other_obj))
# Discard original indexed coordinates prior to merge allows to:
# - fail early if the new coordinates don't preserve the integrity of existing
# multi-coordinate indexes
# - drop & replace coordinates without alignment (note: we must keep indexed
# coordinates extracted from the DataArray objects passed as values to
# `other` - if any - as those are still used for aligning the old/new coordinates)
coords_to_align = drop_indexed_coords(set(other_coords) & set(other), self)

coords, indexes = merge_coords(
[self.variables, other_obj],
[coords_to_align, other_coords],
priority_arg=1,
indexes=self.xindexes,
indexes=coords_to_align.xindexes,
)

# special case for PandasMultiIndex: updating only its dimension coordinate
# is still allowed but depreciated.
# It is the only case where we need to actually drop coordinates here (multi-index levels)
# TODO: remove when removing PandasMultiIndex's dimension coordinate.
self._drop_coords(self._names - coords_to_align._names)

self._update_coords(coords, indexes)

def _overwrite_indexes(
Expand Down Expand Up @@ -610,15 +627,20 @@ def _update_coords(
original_indexes.update(indexes)
self._data._indexes = original_indexes

def _maybe_drop_multiindex_coords(self, coords: set[Hashable]) -> None:
"""Drops variables in coords, and any associated variables as well."""
def _drop_coords(self, coord_names):
# should drop indexed coordinates only
for name in coord_names:
del self._data._variables[name]
del self._data._indexes[name]
self._data._coord_names.difference_update(coord_names)

def _drop_indexed_coords(self, coords_to_drop: set[Hashable]) -> None:
assert self._data.xindexes is not None
variables, indexes = drop_coords(
coords, self._data._variables, self._data.xindexes
)
self._data._coord_names.intersection_update(variables)
self._data._variables = variables
self._data._indexes = indexes
new_coords = drop_indexed_coords(coords_to_drop, self)
for name in self._data._coord_names - new_coords._names:
del self._data._variables[name]
self._data._indexes = dict(new_coords.xindexes)
self._data._coord_names.intersection_update(new_coords._names)

def __delitem__(self, key: Hashable) -> None:
if key in self:
Expand Down Expand Up @@ -691,13 +713,11 @@ def _update_coords(
original_indexes.update(indexes)
self._data._indexes = original_indexes

def _maybe_drop_multiindex_coords(self, coords: set[Hashable]) -> None:
"""Drops variables in coords, and any associated variables as well."""
variables, indexes = drop_coords(
coords, self._data._coords, self._data.xindexes
)
self._data._coords = variables
self._data._indexes = indexes
def _drop_coords(self, coord_names):
# should drop indexed coordinates only
for name in coord_names:
del self._data._coords[name]
del self._data._indexes[name]

@property
def variables(self):
Expand All @@ -724,35 +744,48 @@ def _ipython_key_completions_(self):
return self._data._ipython_key_completions_()


def drop_coords(
coords_to_drop: set[Hashable], variables, indexes: Indexes
) -> tuple[dict, dict]:
"""Drop index variables associated with variables in coords_to_drop."""
# Only warn when we're dropping the dimension with the multi-indexed coordinate
# If asked to drop a subset of the levels in a multi-index, we raise an error
# later but skip the warning here.
new_variables = dict(variables.copy())
new_indexes = dict(indexes.copy())
for key in coords_to_drop & set(indexes):
maybe_midx = indexes[key]
idx_coord_names = set(indexes.get_all_coords(key))
if (
isinstance(maybe_midx, PandasMultiIndex)
and key == maybe_midx.dim
and (idx_coord_names - coords_to_drop)
):
warnings.warn(
f"Updating MultiIndexed coordinate {key!r} would corrupt indices for "
f"other variables: {list(maybe_midx.index.names)!r}. "
f"This will raise an error in the future. Use `.drop_vars({idx_coord_names!r})` before "
def drop_indexed_coords(
coords_to_drop: set[Hashable], coords: Coordinates
) -> Coordinates:
"""Drop indexed coordinates associated with coordinates in coords_to_drop.
This will raise an error in case it corrupts any passed index and its
coordinate variables.
"""
new_variables = dict(coords.variables)
new_indexes = dict(coords.xindexes)

for idx, idx_coords in coords.xindexes.group_by_index():
idx_drop_coords = set(idx_coords) & coords_to_drop

# special case for pandas multi-index: still allow but deprecate
# dropping only its dimension coordinate.
# TODO: remove when removing PandasMultiIndex's dimension coordinate.
if isinstance(idx, PandasMultiIndex) and idx_drop_coords == {idx.dim}:
idx_drop_coords.update(idx.index.names)
emit_user_level_warning(
f"updating coordinate {idx.dim!r} with a PandasMultiIndex would leave "
f"the multi-index level coordinates {list(idx.index.names)!r} in an inconsistent state. "
f"This will raise an error in the future. Use `.drop_vars({list(idx_coords)!r})` before "
"assigning new coordinate values.",
FutureWarning,
stacklevel=4,
)
for k in idx_coord_names:
del new_variables[k]
del new_indexes[k]
return new_variables, new_indexes

elif idx_drop_coords and len(idx_drop_coords) != len(idx_coords):
idx_drop_coords_str = ", ".join(f"{k!r}" for k in idx_drop_coords)
idx_coords_str = ", ".join(f"{k!r}" for k in idx_coords)
raise ValueError(
f"cannot drop or update coordinate(s) {idx_drop_coords_str}, which would corrupt "
f"the following index built from coordinates {idx_coords_str}:\n"
f"{idx}"
)

for k in idx_drop_coords:
del new_variables[k]
del new_indexes[k]

return Coordinates._construct_direct(coords=new_variables, indexes=new_indexes)


def assert_coordinate_consistent(
Expand All @@ -773,11 +806,15 @@ def assert_coordinate_consistent(


def create_coords_with_default_indexes(
coords: Mapping[Any, Any], data_vars: Mapping[Any, Variable] | None = None
coords: Mapping[Any, Any], data_vars: Mapping[Any, Any] | None = None
) -> Coordinates:
"""Maybe create default indexes from a mapping of coordinates."""
"""Returns a Coordinates object from a mapping of coordinates (arbitrary objects).
Create default (pandas) indexes for each of the input dimension coordinates.
Extract coordinates from each input DataArray.
# Note: data_vars are needed here only because a pd.MultiIndex object
"""
# Note: data_vars is needed here only because a pd.MultiIndex object
# can be promoted as coordinates.
# TODO: It won't be relevant anymore when this behavior will be dropped
# in favor of the more explicit ``Coordinates.from_pandas_multiindex()``.
Expand All @@ -791,34 +828,34 @@ def create_coords_with_default_indexes(
indexes: dict[Hashable, Index] = {}
variables: dict[Hashable, Variable] = {}

maybe_index_vars: dict[Hashable, Variable] = {}
mindex_data_vars: list[Hashable] = []
# promote any pandas multi-index in data_vars as coordinates
coords_promoted: dict[Hashable, Any] = {}
pd_mindex_keys: list[Hashable] = []

for k, v in all_variables.items():
if k in coords:
maybe_index_vars[k] = v
elif isinstance(v, pd.MultiIndex):
# TODO: eventually stop promoting multi-index passed via data variables
mindex_data_vars.append(k)
maybe_index_vars[k] = v

if mindex_data_vars:
warnings.warn(
f"passing one or more `pandas.MultiIndex` via data variable(s) {mindex_data_vars} "
"will no longer create indexed coordinates in the future. "
"If you want to keep this behavior, pass it as coordinates instead.",
if isinstance(v, pd.MultiIndex):
coords_promoted[k] = v
pd_mindex_keys.append(k)
elif k in coords:
coords_promoted[k] = v

if pd_mindex_keys:
pd_mindex_keys_fmt = ",".join([f"'{k}'" for k in pd_mindex_keys])
emit_user_level_warning(
f"the `pandas.MultiIndex` object(s) passed as {pd_mindex_keys_fmt} coordinate(s) or "
"data variable(s) will no longer be implicitly promoted and wrapped into "
"multiple indexed coordinates in the future "
"(i.e., one coordinate for each multi-index level + one dimension coordinate). "
"If you want to keep this behavior, you need to first wrap it explicitly using "
"`mindex_coords = xarray.Coordinates.from_pandas_multiindex(mindex_obj, 'dim')` "
"and pass it as coordinates, e.g., `xarray.Dataset(coords=mindex_coords)`, "
"`dataset.assign_coords(mindex_coords)` or `dataarray.assign_coords(mindex_coords)`.",
FutureWarning,
)

maybe_index_vars = {
k: v
for k, v in all_variables.items()
if k in coords or isinstance(v, pd.MultiIndex)
}

dataarray_coords: list[DataArrayCoordinates] = []

for name, obj in maybe_index_vars.items():
for name, obj in coords_promoted.items():
if isinstance(obj, DataArray):
dataarray_coords.append(obj.coords)

Expand Down
Loading

0 comments on commit 2335bba

Please sign in to comment.