From 81098cff9d011d3971923f099f2c081340c905fd Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 16 Aug 2023 15:18:17 -0700 Subject: [PATCH 01/67] initial prototype for NamedArray --- xarray/core/variable.py | 249 ++-------------------------- xarray/named_array/__init__.py | 0 xarray/named_array/core.py | 285 +++++++++++++++++++++++++++++++++ 3 files changed, 299 insertions(+), 235 deletions(-) create mode 100644 xarray/named_array/__init__.py create mode 100644 xarray/named_array/core.py diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 79debe3a952..0581bb93121 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -25,10 +25,7 @@ as_indexable, ) from xarray.core.options import OPTIONS, _get_keep_attrs -from xarray.core.parallelcompat import ( - get_chunked_array_type, - guess_chunkmanager, -) +from xarray.core.parallelcompat import get_chunked_array_type, guess_chunkmanager from xarray.core.pycompat import ( array_type, integer_types, @@ -37,7 +34,6 @@ is_duck_dask_array, ) from xarray.core.utils import ( - Frozen, NdimSizeLenMixin, OrderedSet, _default, @@ -49,6 +45,7 @@ is_duck_array, maybe_coerce_to_str, ) +from xarray.named_array.core import NamedArray NON_NUMPY_SUPPORTED_ARRAY_TYPES = ( indexing.ExplicitlyIndexed, @@ -311,7 +308,7 @@ def _as_array_or_item(data): return data -class Variable(AbstractArray, NdimSizeLenMixin, VariableArithmetic): +class Variable(NamedArray, AbstractArray, NdimSizeLenMixin, VariableArithmetic): """A netcdf-like variable consisting of dimensions, data and attributes which describe a single Array. A single Variable object is not fully described outside the context of its parent Dataset (if you want such a @@ -363,42 +360,6 @@ def __init__(self, dims, data, attrs=None, encoding=None, fastpath=False): if encoding is not None: self.encoding = encoding - @property - def dtype(self) -> np.dtype: - """ - Data-type of the array’s elements. - - See Also - -------- - ndarray.dtype - numpy.dtype - """ - return self._data.dtype - - @property - def shape(self) -> tuple[int, ...]: - """ - Tuple of array dimensions. - - See Also - -------- - numpy.ndarray.shape - """ - return self._data.shape - - @property - def nbytes(self) -> int: - """ - Total bytes consumed by the elements of the data array. - - If the underlying data array does not include ``nbytes``, estimates - the bytes consumed based on the ``size`` and ``dtype``. - """ - if hasattr(self._data, "nbytes"): - return self._data.nbytes - else: - return self.size * self.dtype.itemsize - @property def _in_memory(self): return isinstance( @@ -560,41 +521,6 @@ def compute(self, **kwargs): new = self.copy(deep=False) return new.load(**kwargs) - def __dask_tokenize__(self): - # Use v.data, instead of v._data, in order to cope with the wrappers - # around NetCDF and the like - from dask.base import normalize_token - - return normalize_token((type(self), self._dims, self.data, self._attrs)) - - def __dask_graph__(self): - if is_duck_dask_array(self._data): - return self._data.__dask_graph__() - else: - return None - - def __dask_keys__(self): - return self._data.__dask_keys__() - - def __dask_layers__(self): - return self._data.__dask_layers__() - - @property - def __dask_optimize__(self): - return self._data.__dask_optimize__ - - @property - def __dask_scheduler__(self): - return self._data.__dask_scheduler__ - - def __dask_postcompute__(self): - array_func, array_args = self._data.__dask_postcompute__() - return self._dask_finalize, (array_func,) + array_args - - def __dask_postpersist__(self): - array_func, array_args = self._data.__dask_postpersist__() - return self._dask_finalize, (array_func,) + array_args - def _dask_finalize(self, results, array_func, *args, **kwargs): data = array_func(results, *args, **kwargs) return Variable(self._dims, data, attrs=self._attrs, encoding=self._encoding) @@ -656,27 +582,6 @@ def to_dict( return item - @property - def dims(self) -> tuple[Hashable, ...]: - """Tuple of dimension names with which this variable is associated.""" - return self._dims - - @dims.setter - def dims(self, value: str | Iterable[Hashable]) -> None: - self._dims = self._parse_dimensions(value) - - def _parse_dimensions(self, dims: str | Iterable[Hashable]) -> tuple[Hashable, ...]: - if isinstance(dims, str): - dims = (dims,) - else: - dims = tuple(dims) - if len(dims) != self.ndim: - raise ValueError( - f"dimensions {dims} must have the same length as the " - f"number of data dimensions, ndim={self.ndim}" - ) - return dims - def _item_key_to_tuple(self, key): if utils.is_dict_like(key): return tuple(key.get(dim, slice(None)) for dim in self.dims) @@ -758,18 +663,18 @@ def _validate_indexers(self, key): if k.ndim > 1: raise IndexError( "Unlabeled multi-dimensional array cannot be " - "used for indexing: {}".format(k) + f"used for indexing: {k}" ) if k.dtype.kind == "b": if self.shape[self.get_axis_num(dim)] != len(k): raise IndexError( - "Boolean array size {:d} is used to index array " - "with shape {:s}.".format(len(k), str(self.shape)) + f"Boolean array size {len(k):d} is used to index array " + f"with shape {str(self.shape):s}." ) if k.ndim > 1: raise IndexError( - "{}-dimensional boolean indexing is " - "not supported. ".format(k.ndim) + f"{k.ndim}-dimensional boolean indexing is " + "not supported. " ) if is_duck_dask_array(k.data): raise KeyError( @@ -782,9 +687,7 @@ def _validate_indexers(self, key): raise IndexError( "Boolean indexer should be unlabeled or on the " "same dimension to the indexed array. Indexer is " - "on {:s} but the target dimension is {:s}.".format( - str(k.dims), dim - ) + f"on {str(k.dims):s} but the target dimension is {dim:s}." ) def _broadcast_indexes_outer(self, key): @@ -967,17 +870,6 @@ def __setitem__(self, key, value): indexable = as_indexable(self._data) indexable[index_tuple] = value - @property - def attrs(self) -> dict[Any, Any]: - """Dictionary of local attributes on this variable.""" - if self._attrs is None: - self._attrs = {} - return self._attrs - - @attrs.setter - def attrs(self, value: Mapping[Any, Any]) -> None: - self._attrs = dict(value) - @property def encoding(self) -> dict[Any, Any]: """Dictionary of encodings on this variable.""" @@ -996,66 +888,6 @@ def reset_encoding(self: T_Variable) -> T_Variable: """Return a new Variable without encoding.""" return self._replace(encoding={}) - def copy( - self: T_Variable, deep: bool = True, data: ArrayLike | None = None - ) -> T_Variable: - """Returns a copy of this object. - - If `deep=True`, the data array is loaded into memory and copied onto - the new object. Dimensions, attributes and encodings are always copied. - - Use `data` to create a new object with the same structure as - original but entirely new data. - - Parameters - ---------- - deep : bool, default: True - Whether the data array is loaded into memory and copied onto - the new object. Default is True. - data : array_like, optional - Data to use in the new object. Must have same shape as original. - When `data` is used, `deep` is ignored. - - Returns - ------- - object : Variable - New object with dimensions, attributes, encodings, and optionally - data copied from original. - - Examples - -------- - Shallow copy versus deep copy - - >>> var = xr.Variable(data=[1, 2, 3], dims="x") - >>> var.copy() - - array([1, 2, 3]) - >>> var_0 = var.copy(deep=False) - >>> var_0[0] = 7 - >>> var_0 - - array([7, 2, 3]) - >>> var - - array([7, 2, 3]) - - Changing the data using the ``data`` argument maintains the - structure of the original object, but with the new data. Original - object is unaffected. - - >>> var.copy(data=[0.1, 0.2, 0.3]) - - array([0.1, 0.2, 0.3]) - >>> var - - array([7, 2, 3]) - - See Also - -------- - pandas.DataFrame.copy - """ - return self._copy(deep=deep, data=data) - def _copy( self: T_Variable, deep: bool = True, @@ -1094,64 +926,11 @@ def _replace( attrs=_default, encoding=_default, ) -> T_Variable: - if dims is _default: - dims = copy.copy(self._dims) - if data is _default: - data = copy.copy(self.data) - if attrs is _default: - attrs = copy.copy(self._attrs) + new_object = super()._replace(dims, data, attrs) if encoding is _default: encoding = copy.copy(self._encoding) - return type(self)(dims, data, attrs, encoding, fastpath=True) - - def __copy__(self: T_Variable) -> T_Variable: - return self._copy(deep=False) - - def __deepcopy__( - self: T_Variable, memo: dict[int, Any] | None = None - ) -> T_Variable: - return self._copy(deep=True, memo=memo) - - # mutable objects should not be hashable - # https://github.com/python/mypy/issues/4266 - __hash__ = None # type: ignore[assignment] - - @property - def chunks(self) -> tuple[tuple[int, ...], ...] | None: - """ - Tuple of block lengths for this dataarray's data, in order of dimensions, or None if - the underlying data is not a dask array. - - See Also - -------- - Variable.chunk - Variable.chunksizes - xarray.unify_chunks - """ - return getattr(self._data, "chunks", None) - - @property - def chunksizes(self) -> Mapping[Any, tuple[int, ...]]: - """ - Mapping from dimension names to block lengths for this variable's data, or None if - the underlying data is not a dask array. - Cannot be modified directly, but can be modified by calling .chunk(). - - Differs from variable.chunks because it returns a mapping of dimensions to chunk shapes - instead of a tuple of chunk shapes. - - See Also - -------- - Variable.chunk - Variable.chunks - xarray.unify_chunks - """ - if hasattr(self._data, "chunks"): - return Frozen({dim: c for dim, c in zip(self.dims, self.data.chunks)}) - else: - return {} - - _array_counter = itertools.count() + new_object._encoding = encoding + return new_object def chunk( self, @@ -2549,8 +2328,8 @@ def coarsen_reshape(self, windows, boundary, side): variable = variable.pad(pad_width, mode="constant") else: raise TypeError( - "{} is invalid for boundary. Valid option is 'exact', " - "'trim' and 'pad'".format(boundary[d]) + f"{boundary[d]} is invalid for boundary. Valid option is 'exact', " + "'trim' and 'pad'" ) shape = [] diff --git a/xarray/named_array/__init__.py b/xarray/named_array/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/xarray/named_array/core.py b/xarray/named_array/core.py new file mode 100644 index 00000000000..28e392f3d4d --- /dev/null +++ b/xarray/named_array/core.py @@ -0,0 +1,285 @@ +import copy +import itertools +import typing + +import numpy as np +import numpy.typing as npt + +from xarray.core.pycompat import is_duck_dask_array +from xarray.core.utils import Frozen, _default + + +class NamedArray: + __slots__ = ("_dims", "_data", "_attrs") + + def __init__( + self, dims, data: npt.ArrayLike, attrs: dict[typing.Any, typing.Any] = None + ): + self._dims = self._parse_dimensions(dims) + self._data = data + self._attrs = attrs or {} + + @property + def dtype(self) -> np.dtype: + """ + Data-type of the array’s elements. + + See Also + -------- + ndarray.dtype + numpy.dtype + """ + return self._data.dtype + + @property + def shape(self) -> tuple[int, ...]: + """ + Tuple of array dimensions. + + See Also + -------- + numpy.ndarray.shape + """ + return self._data.shape + + @property + def nbytes(self) -> int: + """ + Total bytes consumed by the elements of the data array. + + If the underlying data array does not include ``nbytes``, estimates + the bytes consumed based on the ``size`` and ``dtype``. + """ + if hasattr(self._data, "nbytes"): + return self._data.nbytes + else: + return self.size * self.dtype.itemsize + + @property + def dims(self) -> tuple[typing.Hashable, ...]: + """Tuple of dimension names with which this variable is associated.""" + return self._dims + + @dims.setter + def dims(self, value: str | typing.Iterable[typing.Hashable]) -> None: + self._dims = self._parse_dimensions(value) + + def _parse_dimensions( + self, dims: str | typing.Iterable[typing.Hashable] + ) -> tuple[typing.Hashable, ...]: + dims = (dims,) if isinstance(dims, str) else tuple(dims) + if len(dims) != self.ndim: + raise ValueError( + f"dimensions {dims} must have the same length as the " + f"number of data dimensions, ndim={self.ndim}" + ) + return dims + + @property + def attrs(self) -> dict[typing.Any, typing.Any]: + """Dictionary of local attributes on this variable.""" + if self._attrs is None: + self._attrs = {} + return self._attrs + + @attrs.setter + def attrs(self, value: typing.Mapping[typing.Any, typing.Any]) -> None: + self._attrs = dict(value) + + @property + def data(self) -> typing.Any: + """ + The Variable's data as an array. The underlying array type + (e.g. dask, sparse, pint) is preserved. + + See Also + -------- + Variable.to_numpy + Variable.as_numpy + Variable.values + """ + + return self._data + + @data.setter + def data(self, data): + if data.shape != self.shape: + raise ValueError( + f"replacement data must match the Variable's shape. " + f"replacement data has shape {data.shape}; Variable has shape {self.shape}" + ) + self._data = data + + def __dask_tokenize__(self): + # Use v.data, instead of v._data, in order to cope with the wrappers + # around NetCDF and the like + from dask.base import normalize_token + + return normalize_token((type(self), self._dims, self.data, self._attrs)) + + def __dask_graph__(self): + return self._data.__dask_graph__() if is_duck_dask_array(self._data) else None + + def __dask_keys__(self): + return self._data.__dask_keys__() + + def __dask_layers__(self): + return self._data.__dask_layers__() + + @property + def __dask_optimize__(self): + return self._data.__dask_optimize__ + + @property + def __dask_scheduler__(self): + return self._data.__dask_scheduler__ + + def __dask_postcompute__(self): + array_func, array_args = self._data.__dask_postcompute__() + return self._dask_finalize, (array_func,) + array_args + + def __dask_postpersist__(self): + array_func, array_args = self._data.__dask_postpersist__() + return self._dask_finalize, (array_func,) + array_args + + def _dask_finalize(self, results, array_func, *args, **kwargs): + data = array_func(results, *args, **kwargs) + return NamedArray(self._dims, data, attrs=self._attrs) + + @property + def chunks(self) -> tuple[tuple[int, ...], ...] | None: + """ + Tuple of block lengths for this dataarray's data, in order of dimensions, or None if + the underlying data is not a dask array. + + See Also + -------- + Variable.chunk + Variable.chunksizes + xarray.unify_chunks + """ + return getattr(self._data, "chunks", None) + + @property + def chunksizes(self) -> typing.Mapping[typing.Any, tuple[int, ...]]: + """ + Mapping from dimension names to block lengths for this variable's data, or None if + the underlying data is not a dask array. + Cannot be modified directly, but can be modified by calling .chunk(). + + Differs from variable.chunks because it returns a mapping of dimensions to chunk shapes + instead of a tuple of chunk shapes. + + See Also + -------- + Variable.chunk + Variable.chunks + xarray.unify_chunks + """ + if hasattr(self._data, "chunks"): + return Frozen(dict(zip(self.dims, self.data.chunks))) + else: + return {} + + def _replace(self, dims=_default, data=_default, attrs=_default): + if dims is _default: + dims = copy.copy(self._dims) + if data is _default: + data = copy.copy(self._data) + if attrs is _default: + attrs = copy.copy(self._attrs) + return type(self)(dims, data, attrs) + + def _copy( + self, + deep: bool = True, + data: npt.ArrayLike | None = None, + memo: dict[int, typing.Any] | None = None, + ): + if data is None: + ndata = self._data + + if deep: + ndata = copy.deepcopy(ndata, memo=memo) + else: + ndata = data + if self.shape != ndata.shape: + raise ValueError( + f"Data shape {ndata.shape} must match shape of object {self.shape}" + ) + + attrs = ( + copy.deepcopy(self._attrs, memo=memo) if deep else copy.copy(self._attrs) + ) + + return self._replace(data=ndata, attrs=attrs) + + def __copy__(self): + return self._copy(deep=False) + + def __deepcopy__(self, memo: dict[int, typing.Any] | None = None): + return self._copy(deep=True, memo=memo) + + def copy(self, deep: bool = True, data: npt.ArrayLike | None = None): + """Returns a copy of this object. + + If `deep=True`, the data array is loaded into memory and copied onto + the new object. Dimensions, attributes and encodings are always copied. + + Use `data` to create a new object with the same structure as + original but entirely new data. + + Parameters + ---------- + deep : bool, default: True + Whether the data array is loaded into memory and copied onto + the new object. Default is True. + data : array_like, optional + Data to use in the new object. Must have same shape as original. + When `data` is used, `deep` is ignored. + + Returns + ------- + object : Variable + New object with dimensions, attributes, encodings, and optionally + data copied from original. + + Examples + -------- + Shallow copy versus deep copy + + >>> var = xr.Variable(data=[1, 2, 3], dims="x") + >>> var.copy() + + array([1, 2, 3]) + >>> var_0 = var.copy(deep=False) + >>> var_0[0] = 7 + >>> var_0 + + array([7, 2, 3]) + >>> var + + array([7, 2, 3]) + + Changing the data using the ``data`` argument maintains the + structure of the original object, but with the new data. Original + object is unaffected. + + >>> var.copy(data=[0.1, 0.2, 0.3]) + + array([0.1, 0.2, 0.3]) + >>> var + + array([7, 2, 3]) + + See Also + -------- + pandas.DataFrame.copy + """ + return self._copy(deep=deep, data=data) + + # mutable objects should not be hashable + # https://github.com/python/mypy/issues/4266 + __hash__ = None # type: ignore[assignment] + + _array_counter = itertools.count() From 27910dcf01ec3bc087a14fc4dfa1c9c6495e801c Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 16 Aug 2023 16:09:27 -0700 Subject: [PATCH 02/67] move NDArrayMixin and NdimSizeLenMixin inside named_array --- xarray/backends/common.py | 3 +- xarray/core/indexing.py | 2 +- xarray/core/utils.py | 64 ----------------------------------- xarray/core/variable.py | 25 +------------- xarray/named_array/core.py | 25 +++++++++++++- xarray/named_array/utils.py | 67 +++++++++++++++++++++++++++++++++++++ xarray/tests/__init__.py | 6 ++-- 7 files changed, 98 insertions(+), 94 deletions(-) create mode 100644 xarray/named_array/utils.py diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 1ac988c6b4f..a74481382e0 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -14,7 +14,8 @@ from xarray.core import indexing from xarray.core.parallelcompat import get_chunked_array_type from xarray.core.pycompat import is_chunked_array -from xarray.core.utils import FrozenDict, NdimSizeLenMixin, is_remote_uri +from xarray.core.utils import FrozenDict, is_remote_uri +from xarray.named_array.utils import NdimSizeLenMixin if TYPE_CHECKING: from io import BufferedIOBase diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index acab9ccc60b..1439ab89290 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -26,12 +26,12 @@ ) from xarray.core.types import T_Xarray from xarray.core.utils import ( - NDArrayMixin, either_dict_or_kwargs, get_valid_numpy_dtype, is_scalar, to_0d_array, ) +from xarray.named_array.utils import NDArrayMixin if TYPE_CHECKING: from numpy.typing import DTypeLike diff --git a/xarray/core/utils.py b/xarray/core/utils.py index bd0ca57f33c..74e4ac3b29a 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -41,7 +41,6 @@ import inspect import io import itertools -import math import os import re import sys @@ -542,69 +541,6 @@ def __repr__(self) -> str: return f"{type(self).__name__}({list(self)!r})" -class NdimSizeLenMixin: - """Mixin class that extends a class that defines a ``shape`` property to - one that also defines ``ndim``, ``size`` and ``__len__``. - """ - - __slots__ = () - - @property - def ndim(self: Any) -> int: - """ - Number of array dimensions. - - See Also - -------- - numpy.ndarray.ndim - """ - return len(self.shape) - - @property - def size(self: Any) -> int: - """ - Number of elements in the array. - - Equal to ``np.prod(a.shape)``, i.e., the product of the array’s dimensions. - - See Also - -------- - numpy.ndarray.size - """ - return math.prod(self.shape) - - def __len__(self: Any) -> int: - try: - return self.shape[0] - except IndexError: - raise TypeError("len() of unsized object") - - -class NDArrayMixin(NdimSizeLenMixin): - """Mixin class for making wrappers of N-dimensional arrays that conform to - the ndarray interface required for the data argument to Variable objects. - - A subclass should set the `array` property and override one or more of - `dtype`, `shape` and `__getitem__`. - """ - - __slots__ = () - - @property - def dtype(self: Any) -> np.dtype: - return self.array.dtype - - @property - def shape(self: Any) -> tuple[int, ...]: - return self.array.shape - - def __getitem__(self: Any, key): - return self.array[key] - - def __repr__(self: Any) -> str: - return f"{type(self).__name__}(array={self.array!r})" - - class ReprObject: """Object that prints as the given value, for use with sentinel values.""" diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 0581bb93121..ba523dc49c3 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -34,7 +34,6 @@ is_duck_dask_array, ) from xarray.core.utils import ( - NdimSizeLenMixin, OrderedSet, _default, decode_numpy_dict_values, @@ -308,7 +307,7 @@ def _as_array_or_item(data): return data -class Variable(NamedArray, AbstractArray, NdimSizeLenMixin, VariableArithmetic): +class Variable(NamedArray, AbstractArray, VariableArithmetic): """A netcdf-like variable consisting of dimensions, data and attributes which describe a single Array. A single Variable object is not fully described outside the context of its parent Dataset (if you want such a @@ -2415,28 +2414,6 @@ def notnull(self, keep_attrs: bool | None = None): keep_attrs=keep_attrs, ) - @property - def real(self): - """ - The real part of the variable. - - See Also - -------- - numpy.ndarray.real - """ - return self._replace(data=self.data.real) - - @property - def imag(self): - """ - The imaginary part of the variable. - - See Also - -------- - numpy.ndarray.imag - """ - return self._replace(data=self.data.imag) - def __array_wrap__(self, obj, context=None): return Variable(self.dims, obj) diff --git a/xarray/named_array/core.py b/xarray/named_array/core.py index 28e392f3d4d..7bf239a9db0 100644 --- a/xarray/named_array/core.py +++ b/xarray/named_array/core.py @@ -7,9 +7,10 @@ from xarray.core.pycompat import is_duck_dask_array from xarray.core.utils import Frozen, _default +from xarray.named_array.utils import NdimSizeLenMixin -class NamedArray: +class NamedArray(NdimSizeLenMixin): __slots__ = ("_dims", "_data", "_attrs") def __init__( @@ -110,6 +111,28 @@ def data(self, data): ) self._data = data + @property + def real(self): + """ + The real part of the variable. + + See Also + -------- + numpy.ndarray.real + """ + return self._replace(data=self.data.real) + + @property + def imag(self): + """ + The imaginary part of the variable. + + See Also + -------- + numpy.ndarray.imag + """ + return self._replace(data=self.data.imag) + def __dask_tokenize__(self): # Use v.data, instead of v._data, in order to cope with the wrappers # around NetCDF and the like diff --git a/xarray/named_array/utils.py b/xarray/named_array/utils.py new file mode 100644 index 00000000000..b0c39658294 --- /dev/null +++ b/xarray/named_array/utils.py @@ -0,0 +1,67 @@ +import math +import typing + +import numpy as np + + +class NdimSizeLenMixin: + """Mixin class that extends a class that defines a ``shape`` property to + one that also defines ``ndim``, ``size`` and ``__len__``. + """ + + __slots__ = () + + @property + def ndim(self: typing.Any) -> int: + """ + Number of array dimensions. + + See Also + -------- + numpy.ndarray.ndim + """ + return len(self.shape) + + @property + def size(self: typing.Any) -> int: + """ + Number of elements in the array. + + Equal to ``np.prod(a.shape)``, i.e., the product of the array’s dimensions. + + See Also + -------- + numpy.ndarray.size + """ + return math.prod(self.shape) + + def __len__(self: typing.Any) -> int: + try: + return self.shape[0] + except IndexError: + raise TypeError("len() of unsized object") + + +class NDArrayMixin(NdimSizeLenMixin): + """Mixin class for making wrappers of N-dimensional arrays that conform to + the ndarray interface required for the data argument to Variable objects. + + A subclass should set the `array` property and override one or more of + `dtype`, `shape` and `__getitem__`. + """ + + __slots__ = () + + @property + def dtype(self: typing.Any) -> np.dtype: + return self.array.dtype + + @property + def shape(self: typing.Any) -> tuple[int, ...]: + return self.array.shape + + def __getitem__(self: typing.Any, key): + return self.array[key] + + def __repr__(self: typing.Any) -> str: + return f"{type(self).__name__}(array={self.array!r})" diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 7e1b964ecba..dd043f9b5ac 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -15,10 +15,10 @@ import xarray.testing from xarray import Dataset -from xarray.core import utils from xarray.core.duck_array_ops import allclose_or_equiv # noqa: F401 from xarray.core.indexing import ExplicitlyIndexed from xarray.core.options import set_options +from xarray.named_array.utils import NDArrayMixin from xarray.testing import ( # noqa: F401 assert_chunks_equal, assert_duckarray_allclose, @@ -138,7 +138,7 @@ class UnexpectedDataAccess(Exception): pass -class InaccessibleArray(utils.NDArrayMixin, ExplicitlyIndexed): +class InaccessibleArray(NDArrayMixin, ExplicitlyIndexed): """Disallows any loading.""" def __init__(self, array): @@ -162,7 +162,7 @@ def __getitem__(self, key): return self.array[tuple_idxr] -class DuckArrayWrapper(utils.NDArrayMixin): +class DuckArrayWrapper(NDArrayMixin): """Array-like that prevents casting to array. Modeled after cupy.""" From 1a02dac5f227d44d22e64a54bd998bf207f9ea7f Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 16 Aug 2023 16:49:37 -0700 Subject: [PATCH 03/67] vendor is_duck_dask_array --- xarray/named_array/core.py | 3 +-- xarray/named_array/utils.py | 45 +++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/xarray/named_array/core.py b/xarray/named_array/core.py index 7bf239a9db0..964ec432b36 100644 --- a/xarray/named_array/core.py +++ b/xarray/named_array/core.py @@ -5,9 +5,8 @@ import numpy as np import numpy.typing as npt -from xarray.core.pycompat import is_duck_dask_array from xarray.core.utils import Frozen, _default -from xarray.named_array.utils import NdimSizeLenMixin +from xarray.named_array.utils import NdimSizeLenMixin, is_duck_dask_array class NamedArray(NdimSizeLenMixin): diff --git a/xarray/named_array/utils.py b/xarray/named_array/utils.py index b0c39658294..d8e21bcc207 100644 --- a/xarray/named_array/utils.py +++ b/xarray/named_array/utils.py @@ -1,3 +1,4 @@ +import importlib import math import typing @@ -65,3 +66,47 @@ def __getitem__(self: typing.Any, key): def __repr__(self: typing.Any) -> str: return f"{type(self).__name__}(array={self.array!r})" + + +def module_available(module: str) -> bool: + """Checks whether a module is installed without importing it. + + Use this for a lightweight check and lazy imports. + + Parameters + ---------- + module : str + Name of the module. + + Returns + ------- + available : bool + Whether the module is installed. + """ + return importlib.util.find_spec(module) is not None + + +def is_dask_collection(x): + if module_available("dask"): + from dask.base import is_dask_collection + + return is_dask_collection(x) + return False + + +def is_duck_array(value: typing.Any) -> bool: + if isinstance(value, np.ndarray): + return True + return ( + hasattr(value, "ndim") + and hasattr(value, "shape") + and hasattr(value, "dtype") + and ( + (hasattr(value, "__array_function__") and hasattr(value, "__array_ufunc__")) + or hasattr(value, "__array_namespace__") + ) + ) + + +def is_duck_dask_array(x): + return is_duck_array(x) and is_dask_collection(x) From 636b156b1166fd79b8e54f218e60cd6d44f46f75 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 16 Aug 2023 16:52:47 -0700 Subject: [PATCH 04/67] vendor Frozen object --- xarray/named_array/core.py | 4 ++-- xarray/named_array/utils.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/xarray/named_array/core.py b/xarray/named_array/core.py index 964ec432b36..0e497cd27d0 100644 --- a/xarray/named_array/core.py +++ b/xarray/named_array/core.py @@ -5,8 +5,8 @@ import numpy as np import numpy.typing as npt -from xarray.core.utils import Frozen, _default -from xarray.named_array.utils import NdimSizeLenMixin, is_duck_dask_array +from xarray.core.utils import _default +from xarray.named_array.utils import Frozen, NdimSizeLenMixin, is_duck_dask_array class NamedArray(NdimSizeLenMixin): diff --git a/xarray/named_array/utils.py b/xarray/named_array/utils.py index d8e21bcc207..64fcaf3dc53 100644 --- a/xarray/named_array/utils.py +++ b/xarray/named_array/utils.py @@ -1,9 +1,14 @@ import importlib import math import typing +from collections.abc import Iterator, Mapping import numpy as np +K = typing.TypeVar("K") +V = typing.TypeVar("V") +T = typing.TypeVar("T") + class NdimSizeLenMixin: """Mixin class that extends a class that defines a ``shape`` property to @@ -68,6 +73,33 @@ def __repr__(self: typing.Any) -> str: return f"{type(self).__name__}(array={self.array!r})" +class Frozen(Mapping[K, V]): + """Wrapper around an object implementing the mapping interface to make it + immutable. If you really want to modify the mapping, the mutable version is + saved under the `mapping` attribute. + """ + + __slots__ = ("mapping",) + + def __init__(self, mapping: Mapping[K, V]): + self.mapping = mapping + + def __getitem__(self, key: K) -> V: + return self.mapping[key] + + def __iter__(self) -> Iterator[K]: + return iter(self.mapping) + + def __len__(self) -> int: + return len(self.mapping) + + def __contains__(self, key: object) -> bool: + return key in self.mapping + + def __repr__(self) -> str: + return f"{type(self).__name__}({self.mapping!r})" + + def module_available(module: str) -> bool: """Checks whether a module is installed without importing it. From 9ba6c84a790eab40f430f815b319bc38dd0ee74f Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 16 Aug 2023 19:14:42 -0700 Subject: [PATCH 05/67] update import --- xarray/tests/test_variable.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index e1a2d9c8922..37f66d811fc 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -26,8 +26,8 @@ VectorizedIndexer, ) from xarray.core.pycompat import array_type -from xarray.core.utils import NDArrayMixin from xarray.core.variable import as_compatible_data, as_variable +from xarray.named_array.utils import NDArrayMixin from xarray.tests import ( assert_allclose, assert_array_equal, From b1a1de05115e0a01bf0d45e871c03ad3da58286b Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 16 Aug 2023 19:28:16 -0700 Subject: [PATCH 06/67] move _default sentinel value --- xarray/core/alignment.py | 2 +- xarray/core/dataarray.py | 3 +-- xarray/core/dataset.py | 3 +-- xarray/core/utils.py | 9 --------- xarray/core/variable.py | 2 +- xarray/named_array/core.py | 8 ++++++-- xarray/named_array/utils.py | 9 +++++++++ 7 files changed, 19 insertions(+), 17 deletions(-) diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index 39ff878b56d..d2bbc459d83 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -839,7 +839,7 @@ def is_alignable(obj): elif raise_on_invalid: raise ValueError( "object to align is neither an xarray.Dataset, " - "an xarray.DataArray nor a dictionary: {!r}".format(variables) + f"an xarray.DataArray nor a dictionary: {variables!r}" ) else: out.append(variables) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index df57ad898e4..9975c1e65e0 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -42,10 +42,8 @@ from xarray.core.merge import PANDAS_TYPES, MergeError from xarray.core.options import OPTIONS, _get_keep_attrs from xarray.core.utils import ( - Default, HybridMappingProxy, ReprObject, - _default, either_dict_or_kwargs, emit_user_level_warning, ) @@ -55,6 +53,7 @@ as_compatible_data, as_variable, ) +from xarray.named_array.utils import Default, _default from xarray.plot.accessor import DataArrayPlotAccessor from xarray.plot.utils import _get_units_from_attrs diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index bdf2d8babe1..a10b0390878 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -91,11 +91,9 @@ ) from xarray.core.types import QuantileMethods, T_Dataset from xarray.core.utils import ( - Default, Frozen, HybridMappingProxy, OrderedSet, - _default, decode_numpy_dict_values, drop_dims_from_indexers, either_dict_or_kwargs, @@ -111,6 +109,7 @@ broadcast_variables, calculate_dimensions, ) +from xarray.named_array.utils import Default, _default from xarray.plot.accessor import DatasetPlotAccessor if TYPE_CHECKING: diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 74e4ac3b29a..54367a9da26 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -56,7 +56,6 @@ MutableSet, Sequence, ) -from enum import Enum from typing import ( TYPE_CHECKING, Any, @@ -1044,14 +1043,6 @@ def __get__(self, obj: None | object, cls) -> type[_Accessor] | _Accessor: return self._accessor(obj) # type: ignore # assume it is a valid accessor! -# Singleton type, as per https://github.com/python/typing/pull/240 -class Default(Enum): - token = 0 - - -_default = Default.token - - def iterate_nested(nested_list): for item in nested_list: if isinstance(item, list): diff --git a/xarray/core/variable.py b/xarray/core/variable.py index ba523dc49c3..ddd529462f0 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -35,7 +35,6 @@ ) from xarray.core.utils import ( OrderedSet, - _default, decode_numpy_dict_values, drop_dims_from_indexers, either_dict_or_kwargs, @@ -45,6 +44,7 @@ maybe_coerce_to_str, ) from xarray.named_array.core import NamedArray +from xarray.named_array.utils import _default NON_NUMPY_SUPPORTED_ARRAY_TYPES = ( indexing.ExplicitlyIndexed, diff --git a/xarray/named_array/core.py b/xarray/named_array/core.py index 0e497cd27d0..4f07c5d1621 100644 --- a/xarray/named_array/core.py +++ b/xarray/named_array/core.py @@ -5,8 +5,12 @@ import numpy as np import numpy.typing as npt -from xarray.core.utils import _default -from xarray.named_array.utils import Frozen, NdimSizeLenMixin, is_duck_dask_array +from xarray.named_array.utils import ( + Frozen, + NdimSizeLenMixin, + _default, + is_duck_dask_array, +) class NamedArray(NdimSizeLenMixin): diff --git a/xarray/named_array/utils.py b/xarray/named_array/utils.py index 64fcaf3dc53..6d0bfaa6673 100644 --- a/xarray/named_array/utils.py +++ b/xarray/named_array/utils.py @@ -1,3 +1,4 @@ +import enum import importlib import math import typing @@ -10,6 +11,14 @@ T = typing.TypeVar("T") +# Singleton type, as per https://github.com/python/typing/pull/240 +class Default(enum.Enum): + token = 0 + + +_default = Default.token + + class NdimSizeLenMixin: """Mixin class that extends a class that defines a ``shape`` property to one that also defines ``ndim``, ``size`` and ``__len__``. From 1e11e87fa2bb84d83319d8a4630808751a4c4839 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 17 Aug 2023 09:27:03 -0700 Subject: [PATCH 07/67] rename subpackage to namedarray per @TomNicholas suggestion --- xarray/backends/common.py | 2 +- xarray/core/dataarray.py | 2 +- xarray/core/dataset.py | 2 +- xarray/core/indexing.py | 2 +- xarray/core/variable.py | 4 ++-- xarray/{named_array => namedarray}/__init__.py | 0 xarray/{named_array => namedarray}/core.py | 2 +- xarray/{named_array => namedarray}/utils.py | 0 xarray/tests/__init__.py | 2 +- xarray/tests/test_variable.py | 2 +- 10 files changed, 9 insertions(+), 9 deletions(-) rename xarray/{named_array => namedarray}/__init__.py (100%) rename xarray/{named_array => namedarray}/core.py (99%) rename xarray/{named_array => namedarray}/utils.py (100%) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index a74481382e0..1893df52343 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -15,7 +15,7 @@ from xarray.core.parallelcompat import get_chunked_array_type from xarray.core.pycompat import is_chunked_array from xarray.core.utils import FrozenDict, is_remote_uri -from xarray.named_array.utils import NdimSizeLenMixin +from xarray.namedarray.utils import NdimSizeLenMixin if TYPE_CHECKING: from io import BufferedIOBase diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 9975c1e65e0..34b9178b381 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -53,7 +53,7 @@ as_compatible_data, as_variable, ) -from xarray.named_array.utils import Default, _default +from xarray.namedarray.utils import Default, _default from xarray.plot.accessor import DataArrayPlotAccessor from xarray.plot.utils import _get_units_from_attrs diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index a10b0390878..e0067950297 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -109,7 +109,7 @@ broadcast_variables, calculate_dimensions, ) -from xarray.named_array.utils import Default, _default +from xarray.namedarray.utils import Default, _default from xarray.plot.accessor import DatasetPlotAccessor if TYPE_CHECKING: diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 1439ab89290..550bfc0c6fb 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -31,7 +31,7 @@ is_scalar, to_0d_array, ) -from xarray.named_array.utils import NDArrayMixin +from xarray.namedarray.utils import NDArrayMixin if TYPE_CHECKING: from numpy.typing import DTypeLike diff --git a/xarray/core/variable.py b/xarray/core/variable.py index ddd529462f0..dbfc0283d4f 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -43,8 +43,8 @@ is_duck_array, maybe_coerce_to_str, ) -from xarray.named_array.core import NamedArray -from xarray.named_array.utils import _default +from xarray.namedarray.core import NamedArray +from xarray.namedarray.utils import _default NON_NUMPY_SUPPORTED_ARRAY_TYPES = ( indexing.ExplicitlyIndexed, diff --git a/xarray/named_array/__init__.py b/xarray/namedarray/__init__.py similarity index 100% rename from xarray/named_array/__init__.py rename to xarray/namedarray/__init__.py diff --git a/xarray/named_array/core.py b/xarray/namedarray/core.py similarity index 99% rename from xarray/named_array/core.py rename to xarray/namedarray/core.py index 4f07c5d1621..9053d051c79 100644 --- a/xarray/named_array/core.py +++ b/xarray/namedarray/core.py @@ -5,7 +5,7 @@ import numpy as np import numpy.typing as npt -from xarray.named_array.utils import ( +from xarray.namedarray.utils import ( Frozen, NdimSizeLenMixin, _default, diff --git a/xarray/named_array/utils.py b/xarray/namedarray/utils.py similarity index 100% rename from xarray/named_array/utils.py rename to xarray/namedarray/utils.py diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index dd043f9b5ac..53c4ad9581e 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -18,7 +18,7 @@ from xarray.core.duck_array_ops import allclose_or_equiv # noqa: F401 from xarray.core.indexing import ExplicitlyIndexed from xarray.core.options import set_options -from xarray.named_array.utils import NDArrayMixin +from xarray.namedarray.utils import NDArrayMixin from xarray.testing import ( # noqa: F401 assert_chunks_equal, assert_duckarray_allclose, diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 37f66d811fc..e2533690a86 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -27,7 +27,7 @@ ) from xarray.core.pycompat import array_type from xarray.core.variable import as_compatible_data, as_variable -from xarray.named_array.utils import NDArrayMixin +from xarray.namedarray.utils import NDArrayMixin from xarray.tests import ( assert_allclose, assert_array_equal, From ad364f06132085c34e928cb2f73c5c51f6632a54 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 17 Aug 2023 14:00:24 -0700 Subject: [PATCH 08/67] Remove NdimSizeLenMixin --- xarray/backends/common.py | 34 ++++++++++++++++++++++++++++++-- xarray/namedarray/core.py | 40 +++++++++++++++++++++++++++++++------- xarray/namedarray/utils.py | 20 ++++++------------- 3 files changed, 71 insertions(+), 23 deletions(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 1893df52343..2ce9f68c5f7 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -1,6 +1,7 @@ from __future__ import annotations import logging +import math import os import time import traceback @@ -15,7 +16,6 @@ from xarray.core.parallelcompat import get_chunked_array_type from xarray.core.pycompat import is_chunked_array from xarray.core.utils import FrozenDict, is_remote_uri -from xarray.namedarray.utils import NdimSizeLenMixin if TYPE_CHECKING: from io import BufferedIOBase @@ -163,9 +163,39 @@ def robust_getitem(array, key, catch=Exception, max_retries=6, initial_delay=500 time.sleep(1e-3 * next_delay) -class BackendArray(NdimSizeLenMixin, indexing.ExplicitlyIndexed): +class BackendArray(indexing.ExplicitlyIndexed): __slots__ = () + @property + def ndim(self: Any) -> int: + """ + Number of array dimensions. + + See Also + -------- + numpy.ndarray.ndim + """ + return len(self.shape) + + @property + def size(self: Any) -> int: + """ + Number of elements in the array. + + Equal to ``np.prod(a.shape)``, i.e., the product of the array’s dimensions. + + See Also + -------- + numpy.ndarray.size + """ + return math.prod(self.shape) + + def __len__(self: Any) -> int: + try: + return self.shape[0] + except IndexError: + raise TypeError("len() of unsized object") + def get_duck_array(self, dtype: np.typing.DTypeLike = None): key = indexing.BasicIndexer((slice(None),) * self.ndim) return self[key] # type: ignore [index] diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 9053d051c79..de56a8e6183 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -1,19 +1,15 @@ import copy import itertools +import math import typing import numpy as np import numpy.typing as npt -from xarray.namedarray.utils import ( - Frozen, - NdimSizeLenMixin, - _default, - is_duck_dask_array, -) +from xarray.namedarray.utils import Frozen, _default, is_duck_dask_array -class NamedArray(NdimSizeLenMixin): +class NamedArray: __slots__ = ("_dims", "_data", "_attrs") def __init__( @@ -23,6 +19,36 @@ def __init__( self._data = data self._attrs = attrs or {} + @property + def ndim(self: typing.Any) -> int: + """ + Number of array dimensions. + + See Also + -------- + numpy.ndarray.ndim + """ + return len(self.shape) + + @property + def size(self: typing.Any) -> int: + """ + Number of elements in the array. + + Equal to ``np.prod(a.shape)``, i.e., the product of the array’s dimensions. + + See Also + -------- + numpy.ndarray.size + """ + return math.prod(self.shape) + + def __len__(self: typing.Any) -> int: + try: + return self.shape[0] + except IndexError: + raise TypeError("len() of unsized object") + @property def dtype(self) -> np.dtype: """ diff --git a/xarray/namedarray/utils.py b/xarray/namedarray/utils.py index 6d0bfaa6673..7cff0d34b13 100644 --- a/xarray/namedarray/utils.py +++ b/xarray/namedarray/utils.py @@ -19,9 +19,12 @@ class Default(enum.Enum): _default = Default.token -class NdimSizeLenMixin: - """Mixin class that extends a class that defines a ``shape`` property to - one that also defines ``ndim``, ``size`` and ``__len__``. +class NDArrayMixin: + """Mixin class for making wrappers of N-dimensional arrays that conform to + the ndarray interface required for the data argument to Variable objects. + + A subclass should set the `array` property and override one or more of + `dtype`, `shape` and `__getitem__`. """ __slots__ = () @@ -56,17 +59,6 @@ def __len__(self: typing.Any) -> int: except IndexError: raise TypeError("len() of unsized object") - -class NDArrayMixin(NdimSizeLenMixin): - """Mixin class for making wrappers of N-dimensional arrays that conform to - the ndarray interface required for the data argument to Variable objects. - - A subclass should set the `array` property and override one or more of - `dtype`, `shape` and `__getitem__`. - """ - - __slots__ = () - @property def dtype(self: typing.Any) -> np.dtype: return self.array.dtype From d1e8d2a0a3916e1343e758ca61a1003e7cd57a57 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 17 Aug 2023 15:18:26 -0700 Subject: [PATCH 09/67] fix typing --- xarray/namedarray/core.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index de56a8e6183..4f5f2b90ee5 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -2,6 +2,7 @@ import itertools import math import typing +from collections.abc import Hashable, Iterable, Mapping import numpy as np import numpy.typing as npt @@ -86,17 +87,15 @@ def nbytes(self) -> int: return self.size * self.dtype.itemsize @property - def dims(self) -> tuple[typing.Hashable, ...]: + def dims(self) -> tuple[Hashable, ...]: """Tuple of dimension names with which this variable is associated.""" return self._dims @dims.setter - def dims(self, value: str | typing.Iterable[typing.Hashable]) -> None: + def dims(self, value: str | Iterable[Hashable]) -> None: self._dims = self._parse_dimensions(value) - def _parse_dimensions( - self, dims: str | typing.Iterable[typing.Hashable] - ) -> tuple[typing.Hashable, ...]: + def _parse_dimensions(self, dims: str | Iterable[Hashable]) -> tuple[Hashable, ...]: dims = (dims,) if isinstance(dims, str) else tuple(dims) if len(dims) != self.ndim: raise ValueError( @@ -113,7 +112,7 @@ def attrs(self) -> dict[typing.Any, typing.Any]: return self._attrs @attrs.setter - def attrs(self, value: typing.Mapping[typing.Any, typing.Any]) -> None: + def attrs(self, value: Mapping[typing.Any, typing.Any]) -> None: self._attrs = dict(value) @property From 098eb0cd34fdfbaa61ea322180666d2fc5e80601 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 17 Aug 2023 15:26:29 -0700 Subject: [PATCH 10/67] add annotations --- xarray/namedarray/core.py | 2 ++ xarray/namedarray/utils.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 4f5f2b90ee5..9294ed5a7fc 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import copy import itertools import math diff --git a/xarray/namedarray/utils.py b/xarray/namedarray/utils.py index 7cff0d34b13..5f9c90fe4a2 100644 --- a/xarray/namedarray/utils.py +++ b/xarray/namedarray/utils.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import enum import importlib import math From 38c105a25e63260c16e5fb4f0a3702ce8220c479 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 17 Aug 2023 15:59:38 -0700 Subject: [PATCH 11/67] Remove NDArrayMixin --- xarray/core/indexing.py | 93 +++++++++++++++++++++++++++++++++-- xarray/namedarray/utils.py | 56 --------------------- xarray/tests/__init__.py | 56 +++++++++++++++++++-- xarray/tests/test_variable.py | 49 +++++++++++++++++- 4 files changed, 189 insertions(+), 65 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 550bfc0c6fb..635da960820 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -2,6 +2,7 @@ import enum import functools +import math import operator from collections import Counter, defaultdict from collections.abc import Hashable, Iterable, Mapping @@ -31,7 +32,6 @@ is_scalar, to_0d_array, ) -from xarray.namedarray.utils import NDArrayMixin if TYPE_CHECKING: from numpy.typing import DTypeLike @@ -458,9 +458,53 @@ def get_duck_array(self): return self.array -class ExplicitlyIndexedNDArrayMixin(NDArrayMixin, ExplicitlyIndexed): +class ExplicitlyIndexedNDArrayMixin(ExplicitlyIndexed): __slots__ = () + @property + def ndim(self: Any) -> int: + """ + Number of array dimensions. + + See Also + -------- + numpy.ndarray.ndim + """ + return len(self.shape) + + @property + def size(self: Any) -> int: + """ + Number of elements in the array. + + Equal to ``np.prod(a.shape)``, i.e., the product of the array’s dimensions. + + See Also + -------- + numpy.ndarray.size + """ + return math.prod(self.shape) + + def __len__(self: Any) -> int: + try: + return self.shape[0] + except IndexError: + raise TypeError("len() of unsized object") + + @property + def dtype(self: Any) -> np.dtype: + return self.array.dtype + + @property + def shape(self: Any) -> tuple[int, ...]: + return self.array.shape + + def __getitem__(self: Any, key): + return self.array[key] + + def __repr__(self: Any) -> str: + return f"{type(self).__name__}(array={self.array!r})" + def get_duck_array(self): key = BasicIndexer((slice(None),) * self.ndim) return self[key] @@ -471,7 +515,7 @@ def __array__(self, dtype: np.typing.DTypeLike = None) -> np.ndarray: return np.asarray(self.get_duck_array(), dtype=dtype) -class ImplicitToExplicitIndexingAdapter(NDArrayMixin): +class ImplicitToExplicitIndexingAdapter: """Wrap an array, converting tuples into the indicated explicit indexer.""" __slots__ = ("array", "indexer_cls") @@ -483,6 +527,47 @@ def __init__(self, array, indexer_cls=BasicIndexer): def __array__(self, dtype: np.typing.DTypeLike = None) -> np.ndarray: return np.asarray(self.get_duck_array(), dtype=dtype) + @property + def ndim(self: Any) -> int: + """ + Number of array dimensions. + + See Also + -------- + numpy.ndarray.ndim + """ + return len(self.shape) + + @property + def size(self: Any) -> int: + """ + Number of elements in the array. + + Equal to ``np.prod(a.shape)``, i.e., the product of the array’s dimensions. + + See Also + -------- + numpy.ndarray.size + """ + return math.prod(self.shape) + + def __len__(self: Any) -> int: + try: + return self.shape[0] + except IndexError: + raise TypeError("len() of unsized object") + + @property + def dtype(self: Any) -> np.dtype: + return self.array.dtype + + @property + def shape(self: Any) -> tuple[int, ...]: + return self.array.shape + + def __repr__(self: Any) -> str: + return f"{type(self).__name__}(array={self.array!r})" + def get_duck_array(self): return self.array.get_duck_array() @@ -1303,7 +1388,7 @@ def __init__(self, array): if not isinstance(array, np.ndarray): raise TypeError( "NumpyIndexingAdapter only wraps np.ndarray. " - "Trying to wrap {}".format(type(array)) + f"Trying to wrap {type(array)}" ) self.array = array diff --git a/xarray/namedarray/utils.py b/xarray/namedarray/utils.py index 5f9c90fe4a2..55eb766d986 100644 --- a/xarray/namedarray/utils.py +++ b/xarray/namedarray/utils.py @@ -2,7 +2,6 @@ import enum import importlib -import math import typing from collections.abc import Iterator, Mapping @@ -21,61 +20,6 @@ class Default(enum.Enum): _default = Default.token -class NDArrayMixin: - """Mixin class for making wrappers of N-dimensional arrays that conform to - the ndarray interface required for the data argument to Variable objects. - - A subclass should set the `array` property and override one or more of - `dtype`, `shape` and `__getitem__`. - """ - - __slots__ = () - - @property - def ndim(self: typing.Any) -> int: - """ - Number of array dimensions. - - See Also - -------- - numpy.ndarray.ndim - """ - return len(self.shape) - - @property - def size(self: typing.Any) -> int: - """ - Number of elements in the array. - - Equal to ``np.prod(a.shape)``, i.e., the product of the array’s dimensions. - - See Also - -------- - numpy.ndarray.size - """ - return math.prod(self.shape) - - def __len__(self: typing.Any) -> int: - try: - return self.shape[0] - except IndexError: - raise TypeError("len() of unsized object") - - @property - def dtype(self: typing.Any) -> np.dtype: - return self.array.dtype - - @property - def shape(self: typing.Any) -> tuple[int, ...]: - return self.array.shape - - def __getitem__(self: typing.Any, key): - return self.array[key] - - def __repr__(self: typing.Any) -> str: - return f"{type(self).__name__}(array={self.array!r})" - - class Frozen(Mapping[K, V]): """Wrapper around an object implementing the mapping interface to make it immutable. If you really want to modify the mapping, the mutable version is diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 53c4ad9581e..2ddd61aaedd 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -1,6 +1,7 @@ from __future__ import annotations import importlib +import math import platform import warnings from contextlib import contextmanager, nullcontext @@ -18,7 +19,6 @@ from xarray.core.duck_array_ops import allclose_or_equiv # noqa: F401 from xarray.core.indexing import ExplicitlyIndexed from xarray.core.options import set_options -from xarray.namedarray.utils import NDArrayMixin from xarray.testing import ( # noqa: F401 assert_chunks_equal, assert_duckarray_allclose, @@ -138,7 +138,7 @@ class UnexpectedDataAccess(Exception): pass -class InaccessibleArray(NDArrayMixin, ExplicitlyIndexed): +class InaccessibleArray(ExplicitlyIndexed): """Disallows any loading.""" def __init__(self, array): @@ -153,6 +153,31 @@ def __array__(self, dtype: np.typing.DTypeLike = None): def __getitem__(self, key): raise UnexpectedDataAccess("Tried accessing data.") + @property + def ndim(self) -> int: + return len(self.array.shape) + + @property + def size(self) -> int: + return math.prod(self.array.shape) + + @property + def dtype(self) -> np.dtype: + return self.array.dtype + + @property + def shape(self) -> tuple[int, ...]: + return self.array.shape + + def __len__(self) -> int: + try: + return self.array.shape[0] + except IndexError: + raise TypeError("len() of unsized object") + + def __repr__(self) -> str: + return f"{type(self).__name__}(array={self.array!r})" + class FirstElementAccessibleArray(InaccessibleArray): def __getitem__(self, key): @@ -162,7 +187,7 @@ def __getitem__(self, key): return self.array[tuple_idxr] -class DuckArrayWrapper(NDArrayMixin): +class DuckArrayWrapper: """Array-like that prevents casting to array. Modeled after cupy.""" @@ -178,6 +203,31 @@ def __array__(self, dtype: np.typing.DTypeLike = None): def __array_namespace__(self): """Present to satisfy is_duck_array test.""" + @property + def ndim(self) -> int: + return len(self.array.shape) + + @property + def size(self) -> int: + return math.prod(self.array.shape) + + @property + def dtype(self) -> np.dtype: + return self.array.dtype + + @property + def shape(self) -> tuple[int, ...]: + return self.array.shape + + def __len__(self) -> int: + try: + return self.array.shape[0] + except IndexError: + raise TypeError("len() of unsized object") + + def __repr__(self) -> str: + return f"{type(self).__name__}(array={self.array!r})" + class ReturnItem: def __getitem__(self, key): diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 97c2aeb281c..69796fda2de 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1,5 +1,7 @@ from __future__ import annotations +import math +import typing import warnings from abc import ABC, abstractmethod from copy import copy, deepcopy @@ -27,7 +29,6 @@ ) from xarray.core.pycompat import array_type from xarray.core.variable import as_compatible_data, as_variable -from xarray.namedarray.utils import NDArrayMixin from xarray.tests import ( assert_allclose, assert_array_equal, @@ -2692,10 +2693,54 @@ def test_ones_like(self) -> None: def test_unsupported_type(self): # Non indexable type - class CustomArray(NDArrayMixin): + class CustomArray: def __init__(self, array): self.array = array + @property + def ndim(self: typing.Any) -> int: + """ + Number of array dimensions. + + See Also + -------- + numpy.ndarray.ndim + """ + return len(self.shape) + + @property + def size(self: typing.Any) -> int: + """ + Number of elements in the array. + + Equal to ``np.prod(a.shape)``, i.e., the product of the array’s dimensions. + + See Also + -------- + numpy.ndarray.size + """ + return math.prod(self.shape) + + def __len__(self: typing.Any) -> int: + try: + return self.shape[0] + except IndexError: + raise TypeError("len() of unsized object") + + @property + def dtype(self: typing.Any) -> np.dtype: + return self.array.dtype + + @property + def shape(self: typing.Any) -> tuple[int, ...]: + return self.array.shape + + def __getitem__(self: typing.Any, key): + return self.array[key] + + def __repr__(self: typing.Any) -> str: + return f"{type(self).__name__}(array={self.array!r})" + class CustomIndexable(CustomArray, indexing.ExplicitlyIndexed): pass From 1fdd281c064ced109fc492959a4774d0fb277272 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe <13301940+andersy005@users.noreply.github.com> Date: Fri, 18 Aug 2023 16:28:32 -0700 Subject: [PATCH 12/67] Apply suggestions from code review Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> --- xarray/namedarray/core.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 9294ed5a7fc..247a7ce3751 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -23,7 +23,7 @@ def __init__( self._attrs = attrs or {} @property - def ndim(self: typing.Any) -> int: + def ndim(self) -> int: """ Number of array dimensions. @@ -34,7 +34,7 @@ def ndim(self: typing.Any) -> int: return len(self.shape) @property - def size(self: typing.Any) -> int: + def size(self) -> int: """ Number of elements in the array. @@ -46,7 +46,7 @@ def size(self: typing.Any) -> int: """ return math.prod(self.shape) - def __len__(self: typing.Any) -> int: + def __len__(self) -> int: try: return self.shape[0] except IndexError: @@ -118,7 +118,7 @@ def attrs(self, value: Mapping[typing.Any, typing.Any]) -> None: self._attrs = dict(value) @property - def data(self) -> typing.Any: + def data(self) -> T_Array: """ The Variable's data as an array. The underlying array type (e.g. dask, sparse, pint) is preserved. From 33c22165fe90ba00288873d9cde88a8e03cfc40e Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Mon, 21 Aug 2023 11:18:39 -0700 Subject: [PATCH 13/67] fix typing --- xarray/namedarray/core.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 247a7ce3751..8ed414b08b7 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -11,6 +11,10 @@ from xarray.namedarray.utils import Frozen, _default, is_duck_dask_array +# temporary placeholder for indicating that an array api compliant +# type. hopefully in the future we can narrow this down more +T_Array = typing.TypeVar("T_Array", bound=typing.Any) + class NamedArray: __slots__ = ("_dims", "_data", "_attrs") @@ -132,13 +136,16 @@ def data(self) -> T_Array: return self._data - @data.setter - def data(self, data): - if data.shape != self.shape: + def _check_shape(self, new_data): + if new_data.shape != self.shape: raise ValueError( f"replacement data must match the Variable's shape. " - f"replacement data has shape {data.shape}; Variable has shape {self.shape}" + f"replacement data has shape {new_data.shape}; Variable has shape {self.shape}" ) + + @data.setter + def data(self, data): + self._check_shape(data) self._data = data @property @@ -256,10 +263,7 @@ def _copy( ndata = copy.deepcopy(ndata, memo=memo) else: ndata = data - if self.shape != ndata.shape: - raise ValueError( - f"Data shape {ndata.shape} must match shape of object {self.shape}" - ) + self._check_shape(ndata) attrs = ( copy.deepcopy(self._attrs, memo=memo) if deep else copy.copy(self._attrs) From 2c9223dc0a1c5e50af1c42f9741b512f4cf5e202 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Mon, 21 Aug 2023 14:31:57 -0700 Subject: [PATCH 14/67] fix return type --- xarray/namedarray/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 8ed414b08b7..0a36014f2fa 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -204,7 +204,7 @@ def __dask_postpersist__(self): def _dask_finalize(self, results, array_func, *args, **kwargs): data = array_func(results, *args, **kwargs) - return NamedArray(self._dims, data, attrs=self._attrs) + return type(self)(self._dims, data, attrs=self._attrs) @property def chunks(self) -> tuple[tuple[int, ...], ...] | None: From 0e4afe0df764b4c2d6afb20b400f055be56a13cd Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Mon, 21 Aug 2023 19:42:37 -0700 Subject: [PATCH 15/67] revert NDArrayMixin --- xarray/backends/common.py | 35 +------------- xarray/core/indexing.py | 91 ++--------------------------------- xarray/core/utils.py | 64 ++++++++++++++++++++++++ xarray/tests/__init__.py | 56 ++------------------- xarray/tests/test_variable.py | 49 +------------------ 5 files changed, 74 insertions(+), 221 deletions(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 2ce9f68c5f7..1ac988c6b4f 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -1,7 +1,6 @@ from __future__ import annotations import logging -import math import os import time import traceback @@ -15,7 +14,7 @@ from xarray.core import indexing from xarray.core.parallelcompat import get_chunked_array_type from xarray.core.pycompat import is_chunked_array -from xarray.core.utils import FrozenDict, is_remote_uri +from xarray.core.utils import FrozenDict, NdimSizeLenMixin, is_remote_uri if TYPE_CHECKING: from io import BufferedIOBase @@ -163,39 +162,9 @@ def robust_getitem(array, key, catch=Exception, max_retries=6, initial_delay=500 time.sleep(1e-3 * next_delay) -class BackendArray(indexing.ExplicitlyIndexed): +class BackendArray(NdimSizeLenMixin, indexing.ExplicitlyIndexed): __slots__ = () - @property - def ndim(self: Any) -> int: - """ - Number of array dimensions. - - See Also - -------- - numpy.ndarray.ndim - """ - return len(self.shape) - - @property - def size(self: Any) -> int: - """ - Number of elements in the array. - - Equal to ``np.prod(a.shape)``, i.e., the product of the array’s dimensions. - - See Also - -------- - numpy.ndarray.size - """ - return math.prod(self.shape) - - def __len__(self: Any) -> int: - try: - return self.shape[0] - except IndexError: - raise TypeError("len() of unsized object") - def get_duck_array(self, dtype: np.typing.DTypeLike = None): key = indexing.BasicIndexer((slice(None),) * self.ndim) return self[key] # type: ignore [index] diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 635da960820..7969ded3102 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -2,7 +2,6 @@ import enum import functools -import math import operator from collections import Counter, defaultdict from collections.abc import Hashable, Iterable, Mapping @@ -27,6 +26,7 @@ ) from xarray.core.types import T_Xarray from xarray.core.utils import ( + NDArrayMixin, either_dict_or_kwargs, get_valid_numpy_dtype, is_scalar, @@ -458,53 +458,9 @@ def get_duck_array(self): return self.array -class ExplicitlyIndexedNDArrayMixin(ExplicitlyIndexed): +class ExplicitlyIndexedNDArrayMixin(NDArrayMixin, ExplicitlyIndexed): __slots__ = () - @property - def ndim(self: Any) -> int: - """ - Number of array dimensions. - - See Also - -------- - numpy.ndarray.ndim - """ - return len(self.shape) - - @property - def size(self: Any) -> int: - """ - Number of elements in the array. - - Equal to ``np.prod(a.shape)``, i.e., the product of the array’s dimensions. - - See Also - -------- - numpy.ndarray.size - """ - return math.prod(self.shape) - - def __len__(self: Any) -> int: - try: - return self.shape[0] - except IndexError: - raise TypeError("len() of unsized object") - - @property - def dtype(self: Any) -> np.dtype: - return self.array.dtype - - @property - def shape(self: Any) -> tuple[int, ...]: - return self.array.shape - - def __getitem__(self: Any, key): - return self.array[key] - - def __repr__(self: Any) -> str: - return f"{type(self).__name__}(array={self.array!r})" - def get_duck_array(self): key = BasicIndexer((slice(None),) * self.ndim) return self[key] @@ -515,7 +471,7 @@ def __array__(self, dtype: np.typing.DTypeLike = None) -> np.ndarray: return np.asarray(self.get_duck_array(), dtype=dtype) -class ImplicitToExplicitIndexingAdapter: +class ImplicitToExplicitIndexingAdapter(NDArrayMixin): """Wrap an array, converting tuples into the indicated explicit indexer.""" __slots__ = ("array", "indexer_cls") @@ -527,47 +483,6 @@ def __init__(self, array, indexer_cls=BasicIndexer): def __array__(self, dtype: np.typing.DTypeLike = None) -> np.ndarray: return np.asarray(self.get_duck_array(), dtype=dtype) - @property - def ndim(self: Any) -> int: - """ - Number of array dimensions. - - See Also - -------- - numpy.ndarray.ndim - """ - return len(self.shape) - - @property - def size(self: Any) -> int: - """ - Number of elements in the array. - - Equal to ``np.prod(a.shape)``, i.e., the product of the array’s dimensions. - - See Also - -------- - numpy.ndarray.size - """ - return math.prod(self.shape) - - def __len__(self: Any) -> int: - try: - return self.shape[0] - except IndexError: - raise TypeError("len() of unsized object") - - @property - def dtype(self: Any) -> np.dtype: - return self.array.dtype - - @property - def shape(self: Any) -> tuple[int, ...]: - return self.array.shape - - def __repr__(self: Any) -> str: - return f"{type(self).__name__}(array={self.array!r})" - def get_duck_array(self): return self.array.get_duck_array() diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 54367a9da26..7173640706e 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -41,6 +41,7 @@ import inspect import io import itertools +import math import os import re import sys @@ -540,6 +541,69 @@ def __repr__(self) -> str: return f"{type(self).__name__}({list(self)!r})" +class NdimSizeLenMixin: + """Mixin class that extends a class that defines a ``shape`` property to + one that also defines ``ndim``, ``size`` and ``__len__``. + """ + + __slots__ = () + + @property + def ndim(self: Any) -> int: + """ + Number of array dimensions. + + See Also + -------- + numpy.ndarray.ndim + """ + return len(self.shape) + + @property + def size(self: Any) -> int: + """ + Number of elements in the array. + + Equal to ``np.prod(a.shape)``, i.e., the product of the array’s dimensions. + + See Also + -------- + numpy.ndarray.size + """ + return math.prod(self.shape) + + def __len__(self: Any) -> int: + try: + return self.shape[0] + except IndexError: + raise TypeError("len() of unsized object") + + +class NDArrayMixin(NdimSizeLenMixin): + """Mixin class for making wrappers of N-dimensional arrays that conform to + the ndarray interface required for the data argument to Variable objects. + + A subclass should set the `array` property and override one or more of + `dtype`, `shape` and `__getitem__`. + """ + + __slots__ = () + + @property + def dtype(self: Any) -> np.dtype: + return self.array.dtype + + @property + def shape(self: Any) -> tuple[int, ...]: + return self.array.shape + + def __getitem__(self: Any, key): + return self.array[key] + + def __repr__(self: Any) -> str: + return f"{type(self).__name__}(array={self.array!r})" + + class ReprObject: """Object that prints as the given value, for use with sentinel values.""" diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 2ddd61aaedd..7e1b964ecba 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -1,7 +1,6 @@ from __future__ import annotations import importlib -import math import platform import warnings from contextlib import contextmanager, nullcontext @@ -16,6 +15,7 @@ import xarray.testing from xarray import Dataset +from xarray.core import utils from xarray.core.duck_array_ops import allclose_or_equiv # noqa: F401 from xarray.core.indexing import ExplicitlyIndexed from xarray.core.options import set_options @@ -138,7 +138,7 @@ class UnexpectedDataAccess(Exception): pass -class InaccessibleArray(ExplicitlyIndexed): +class InaccessibleArray(utils.NDArrayMixin, ExplicitlyIndexed): """Disallows any loading.""" def __init__(self, array): @@ -153,31 +153,6 @@ def __array__(self, dtype: np.typing.DTypeLike = None): def __getitem__(self, key): raise UnexpectedDataAccess("Tried accessing data.") - @property - def ndim(self) -> int: - return len(self.array.shape) - - @property - def size(self) -> int: - return math.prod(self.array.shape) - - @property - def dtype(self) -> np.dtype: - return self.array.dtype - - @property - def shape(self) -> tuple[int, ...]: - return self.array.shape - - def __len__(self) -> int: - try: - return self.array.shape[0] - except IndexError: - raise TypeError("len() of unsized object") - - def __repr__(self) -> str: - return f"{type(self).__name__}(array={self.array!r})" - class FirstElementAccessibleArray(InaccessibleArray): def __getitem__(self, key): @@ -187,7 +162,7 @@ def __getitem__(self, key): return self.array[tuple_idxr] -class DuckArrayWrapper: +class DuckArrayWrapper(utils.NDArrayMixin): """Array-like that prevents casting to array. Modeled after cupy.""" @@ -203,31 +178,6 @@ def __array__(self, dtype: np.typing.DTypeLike = None): def __array_namespace__(self): """Present to satisfy is_duck_array test.""" - @property - def ndim(self) -> int: - return len(self.array.shape) - - @property - def size(self) -> int: - return math.prod(self.array.shape) - - @property - def dtype(self) -> np.dtype: - return self.array.dtype - - @property - def shape(self) -> tuple[int, ...]: - return self.array.shape - - def __len__(self) -> int: - try: - return self.array.shape[0] - except IndexError: - raise TypeError("len() of unsized object") - - def __repr__(self) -> str: - return f"{type(self).__name__}(array={self.array!r})" - class ReturnItem: def __getitem__(self, key): diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 69796fda2de..f30cdcf3f73 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1,7 +1,5 @@ from __future__ import annotations -import math -import typing import warnings from abc import ABC, abstractmethod from copy import copy, deepcopy @@ -28,6 +26,7 @@ VectorizedIndexer, ) from xarray.core.pycompat import array_type +from xarray.core.utils import NDArrayMixin from xarray.core.variable import as_compatible_data, as_variable from xarray.tests import ( assert_allclose, @@ -2693,54 +2692,10 @@ def test_ones_like(self) -> None: def test_unsupported_type(self): # Non indexable type - class CustomArray: + class CustomArray(NDArrayMixin): def __init__(self, array): self.array = array - @property - def ndim(self: typing.Any) -> int: - """ - Number of array dimensions. - - See Also - -------- - numpy.ndarray.ndim - """ - return len(self.shape) - - @property - def size(self: typing.Any) -> int: - """ - Number of elements in the array. - - Equal to ``np.prod(a.shape)``, i.e., the product of the array’s dimensions. - - See Also - -------- - numpy.ndarray.size - """ - return math.prod(self.shape) - - def __len__(self: typing.Any) -> int: - try: - return self.shape[0] - except IndexError: - raise TypeError("len() of unsized object") - - @property - def dtype(self: typing.Any) -> np.dtype: - return self.array.dtype - - @property - def shape(self: typing.Any) -> tuple[int, ...]: - return self.array.shape - - def __getitem__(self: typing.Any, key): - return self.array[key] - - def __repr__(self: typing.Any) -> str: - return f"{type(self).__name__}(array={self.array!r})" - class CustomIndexable(CustomArray, indexing.ExplicitlyIndexed): pass From ab79fb1d5a7873c2302ab8c93c1f41951f5dcf49 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 22 Aug 2023 11:30:52 -0600 Subject: [PATCH 16/67] [WIP] as_compatible_data refactor --- xarray/core/variable.py | 17 +++++------------ xarray/namedarray/core.py | 30 ++++++++++++++++++++++++++++-- 2 files changed, 33 insertions(+), 14 deletions(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 0dcde2f2e9c..3de7898e616 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -246,7 +246,7 @@ def as_compatible_data(data, fastpath: bool = False): if isinstance(data, (Variable, DataArray)): return data.data - if isinstance(data, NON_NUMPY_SUPPORTED_ARRAY_TYPES): + if isinstance(data, pd.Index): data = _possibly_convert_datetime_or_timedelta_index(data) return _maybe_wrap_data(data) @@ -261,7 +261,7 @@ def as_compatible_data(data, fastpath: bool = False): data = np.timedelta64(getattr(data, "value", data), "ns") # we don't want nested self-described arrays - if isinstance(data, (pd.Series, pd.Index, pd.DataFrame)): + if isinstance(data, (pd.Series, pd.DataFrame)): data = data.values if isinstance(data, np.ma.MaskedArray): @@ -272,11 +272,6 @@ def as_compatible_data(data, fastpath: bool = False): else: data = np.asarray(data) - if not isinstance(data, np.ndarray) and ( - hasattr(data, "__array_function__") or hasattr(data, "__array_namespace__") - ): - return data - # validate whether the data is valid data types. data = np.asarray(data) @@ -351,12 +346,10 @@ def __init__(self, dims, data, attrs=None, encoding=None, fastpath=False): Well-behaved code to serialize a Variable should ignore unrecognized encoding items. """ - self._data = as_compatible_data(data, fastpath=fastpath) - self._dims = self._parse_dimensions(dims) - self._attrs = None + super().__init__( + data=as_compatible_data(data, fastpath=fastpath), dims=dims, attrs=attrs + ) self._encoding = None - if attrs is not None: - self.attrs = attrs if encoding is not None: self.encoding = encoding diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 0a36014f2fa..7c6e15296f3 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -9,21 +9,47 @@ import numpy as np import numpy.typing as npt -from xarray.namedarray.utils import Frozen, _default, is_duck_dask_array +from xarray.namedarray.utils import Frozen, _default, is_duck_dask_array, is_duck_array + +# TODO: get rid of this after migrating this class +# to array API +from xarray.core.indexing import ExplicitlyIndexed # temporary placeholder for indicating that an array api compliant # type. hopefully in the future we can narrow this down more T_Array = typing.TypeVar("T_Array", bound=typing.Any) +# TODO: Add tests! +def as_compatible_data(data, fastpath: bool = False): + if fastpath and getattr(data, "ndim", 0) > 0: + # can't use fastpath (yet) for scalars + return data + + # TODO : check scalar + if is_duck_array(data): + return data + if isinstance(data, NamedArray): + raise ValueError + if isinstance(data, np.ma.MaskedArray): + raise ValueError + if isinstance(data, ExplicitlyIndexed): + return data + if isinstance(data, tuple): + data = utils.to_0d_object_array(data) + + # validate whether the data is valid data types. + return np.asarray(data) + + class NamedArray: __slots__ = ("_dims", "_data", "_attrs") def __init__( self, dims, data: npt.ArrayLike, attrs: dict[typing.Any, typing.Any] = None ): + self._data = as_compatible_data(data) self._dims = self._parse_dimensions(dims) - self._data = data self._attrs = attrs or {} @property From e70e98a2bf319c69b8a3eac6368e729bb21953fd Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 22 Aug 2023 17:32:20 +0000 Subject: [PATCH 17/67] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/namedarray/core.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 7c6e15296f3..78dd9a5ee03 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -9,11 +9,10 @@ import numpy as np import numpy.typing as npt -from xarray.namedarray.utils import Frozen, _default, is_duck_dask_array, is_duck_array - # TODO: get rid of this after migrating this class # to array API from xarray.core.indexing import ExplicitlyIndexed +from xarray.namedarray.utils import Frozen, _default, is_duck_array, is_duck_dask_array # temporary placeholder for indicating that an array api compliant # type. hopefully in the future we can narrow this down more From a393d7f145f01ceb4f87fdb00345a2045ad65ae4 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 22 Aug 2023 17:25:44 -0700 Subject: [PATCH 18/67] duplicate sentinel value and leave the original sentinel object alone --- xarray/core/dataarray.py | 3 ++- xarray/core/dataset.py | 3 ++- xarray/core/utils.py | 9 +++++++++ xarray/core/variable.py | 2 +- 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 34b9178b381..df57ad898e4 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -42,8 +42,10 @@ from xarray.core.merge import PANDAS_TYPES, MergeError from xarray.core.options import OPTIONS, _get_keep_attrs from xarray.core.utils import ( + Default, HybridMappingProxy, ReprObject, + _default, either_dict_or_kwargs, emit_user_level_warning, ) @@ -53,7 +55,6 @@ as_compatible_data, as_variable, ) -from xarray.namedarray.utils import Default, _default from xarray.plot.accessor import DataArrayPlotAccessor from xarray.plot.utils import _get_units_from_attrs diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index e0067950297..bdf2d8babe1 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -91,9 +91,11 @@ ) from xarray.core.types import QuantileMethods, T_Dataset from xarray.core.utils import ( + Default, Frozen, HybridMappingProxy, OrderedSet, + _default, decode_numpy_dict_values, drop_dims_from_indexers, either_dict_or_kwargs, @@ -109,7 +111,6 @@ broadcast_variables, calculate_dimensions, ) -from xarray.namedarray.utils import Default, _default from xarray.plot.accessor import DatasetPlotAccessor if TYPE_CHECKING: diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 7173640706e..bd0ca57f33c 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -57,6 +57,7 @@ MutableSet, Sequence, ) +from enum import Enum from typing import ( TYPE_CHECKING, Any, @@ -1107,6 +1108,14 @@ def __get__(self, obj: None | object, cls) -> type[_Accessor] | _Accessor: return self._accessor(obj) # type: ignore # assume it is a valid accessor! +# Singleton type, as per https://github.com/python/typing/pull/240 +class Default(Enum): + token = 0 + + +_default = Default.token + + def iterate_nested(nested_list): for item in nested_list: if isinstance(item, list): diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 3de7898e616..cbf1aec2f19 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -36,6 +36,7 @@ ) from xarray.core.utils import ( OrderedSet, + _default, decode_numpy_dict_values, drop_dims_from_indexers, either_dict_or_kwargs, @@ -45,7 +46,6 @@ maybe_coerce_to_str, ) from xarray.namedarray.core import NamedArray -from xarray.namedarray.utils import _default NON_NUMPY_SUPPORTED_ARRAY_TYPES = ( indexing.ExplicitlyIndexed, From 7b8316e337d4e8f85712786604345f47b0a16d7a Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe <13301940+andersy005@users.noreply.github.com> Date: Tue, 22 Aug 2023 17:31:37 -0700 Subject: [PATCH 19/67] Apply suggestions from code review Co-authored-by: Stephan Hoyer --- xarray/namedarray/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 78dd9a5ee03..e2acbabad87 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -49,7 +49,7 @@ def __init__( ): self._data = as_compatible_data(data) self._dims = self._parse_dimensions(dims) - self._attrs = attrs or {} + self._attrs = None if attrs is None else dict(attrs) @property def ndim(self) -> int: From d74b802eea4f6cc6625a8b31de0aed332e2ab1da Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 22 Aug 2023 17:48:59 -0700 Subject: [PATCH 20/67] use DuckArray --- xarray/namedarray/core.py | 32 ++++++++++++++++++-------------- xarray/namedarray/utils.py | 7 +++++++ 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index e2acbabad87..ba3e908eaaf 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -7,16 +7,20 @@ from collections.abc import Hashable, Iterable, Mapping import numpy as np -import numpy.typing as npt -# TODO: get rid of this after migrating this class -# to array API +# TODO: get rid of this after migrating this class to array API from xarray.core.indexing import ExplicitlyIndexed -from xarray.namedarray.utils import Frozen, _default, is_duck_array, is_duck_dask_array +from xarray.namedarray.utils import ( + Frozen, + _default, + is_duck_array, + is_duck_dask_array, + to_0d_object_array, +) -# temporary placeholder for indicating that an array api compliant -# type. hopefully in the future we can narrow this down more -T_Array = typing.TypeVar("T_Array", bound=typing.Any) +# temporary placeholder for indicating an array api compliant type. +# hopefully in the future we can narrow this down more +DuckArray = typing.TypeVar("DuckArray", bound=typing.Any) # TODO: Add tests! @@ -35,7 +39,7 @@ def as_compatible_data(data, fastpath: bool = False): if isinstance(data, ExplicitlyIndexed): return data if isinstance(data, tuple): - data = utils.to_0d_object_array(data) + data = to_0d_object_array(data) # validate whether the data is valid data types. return np.asarray(data) @@ -45,7 +49,7 @@ class NamedArray: __slots__ = ("_dims", "_data", "_attrs") def __init__( - self, dims, data: npt.ArrayLike, attrs: dict[typing.Any, typing.Any] = None + self, dims, data: DuckArray, attrs: dict[typing.Any, typing.Any] = None ): self._data = as_compatible_data(data) self._dims = self._parse_dimensions(dims) @@ -147,7 +151,7 @@ def attrs(self, value: Mapping[typing.Any, typing.Any]) -> None: self._attrs = dict(value) @property - def data(self) -> T_Array: + def data(self) -> DuckArray: """ The Variable's data as an array. The underlying array type (e.g. dask, sparse, pint) is preserved. @@ -161,7 +165,7 @@ def data(self) -> T_Array: return self._data - def _check_shape(self, new_data): + def _check_shape(self, new_data: DuckArray): if new_data.shape != self.shape: raise ValueError( f"replacement data must match the Variable's shape. " @@ -169,7 +173,7 @@ def _check_shape(self, new_data): ) @data.setter - def data(self, data): + def data(self, data: DuckArray) -> None: self._check_shape(data) self._data = data @@ -278,7 +282,7 @@ def _replace(self, dims=_default, data=_default, attrs=_default): def _copy( self, deep: bool = True, - data: npt.ArrayLike | None = None, + data: DuckArray | None = None, memo: dict[int, typing.Any] | None = None, ): if data is None: @@ -302,7 +306,7 @@ def __copy__(self): def __deepcopy__(self, memo: dict[int, typing.Any] | None = None): return self._copy(deep=True, memo=memo) - def copy(self, deep: bool = True, data: npt.ArrayLike | None = None): + def copy(self, deep: bool = True, data: DuckArray | None = None): """Returns a copy of this object. If `deep=True`, the data array is loaded into memory and copied onto diff --git a/xarray/namedarray/utils.py b/xarray/namedarray/utils.py index 55eb766d986..b3ea0a2d573 100644 --- a/xarray/namedarray/utils.py +++ b/xarray/namedarray/utils.py @@ -89,3 +89,10 @@ def is_duck_array(value: typing.Any) -> bool: def is_duck_dask_array(x): return is_duck_array(x) and is_dask_collection(x) + + +def to_0d_object_array(value: typing.Any) -> np.ndarray: + """Given a value, wrap it in a 0-D numpy.ndarray with dtype=object.""" + result = np.empty((), dtype=object) + result[()] = value + return result From acfdb9070647c51b5b2f26d237add0ff883facf9 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe <13301940+andersy005@users.noreply.github.com> Date: Tue, 22 Aug 2023 17:58:52 -0700 Subject: [PATCH 21/67] Apply suggestions from code review Co-authored-by: Stephan Hoyer --- xarray/namedarray/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index ba3e908eaaf..3edcae1565f 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -147,7 +147,7 @@ def attrs(self) -> dict[typing.Any, typing.Any]: return self._attrs @attrs.setter - def attrs(self, value: Mapping[typing.Any, typing.Any]) -> None: + def attrs(self, value: Mapping) -> None: self._attrs = dict(value) @property From 2ece3c00f82d0b375241ddcc72a3b61634c09fcc Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 23 Aug 2023 11:27:40 -0700 Subject: [PATCH 22/67] use sentinel value from xarray --- xarray/namedarray/core.py | 10 +++++----- xarray/namedarray/utils.py | 9 --------- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 3edcae1565f..e7cd3ba676b 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -10,9 +10,9 @@ # TODO: get rid of this after migrating this class to array API from xarray.core.indexing import ExplicitlyIndexed +from xarray.core.utils import _default from xarray.namedarray.utils import ( Frozen, - _default, is_duck_array, is_duck_dask_array, to_0d_object_array, @@ -49,11 +49,11 @@ class NamedArray: __slots__ = ("_dims", "_data", "_attrs") def __init__( - self, dims, data: DuckArray, attrs: dict[typing.Any, typing.Any] = None + self, dims: str | Iterable[Hashable], data: DuckArray, attrs: dict | None = None ): - self._data = as_compatible_data(data) - self._dims = self._parse_dimensions(dims) - self._attrs = None if attrs is None else dict(attrs) + self._data: DuckArray = as_compatible_data(data) + self._dims: tuple[Hashable, ...] = self._parse_dimensions(dims) + self._attrs: dict | None = None if attrs else dict(attrs) @property def ndim(self) -> int: diff --git a/xarray/namedarray/utils.py b/xarray/namedarray/utils.py index b3ea0a2d573..8c48e4acb33 100644 --- a/xarray/namedarray/utils.py +++ b/xarray/namedarray/utils.py @@ -1,6 +1,5 @@ from __future__ import annotations -import enum import importlib import typing from collections.abc import Iterator, Mapping @@ -12,14 +11,6 @@ T = typing.TypeVar("T") -# Singleton type, as per https://github.com/python/typing/pull/240 -class Default(enum.Enum): - token = 0 - - -_default = Default.token - - class Frozen(Mapping[K, V]): """Wrapper around an object implementing the mapping interface to make it immutable. If you really want to modify the mapping, the mutable version is From 6fb79e6d7779f1187fa26b79ab6a72f7984cf024 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 23 Aug 2023 13:47:42 -0700 Subject: [PATCH 23/67] remove unused code --- xarray/namedarray/core.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index e7cd3ba676b..10aadbd42f8 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -1,7 +1,6 @@ from __future__ import annotations import copy -import itertools import math import typing from collections.abc import Hashable, Iterable, Mapping @@ -53,7 +52,7 @@ def __init__( ): self._data: DuckArray = as_compatible_data(data) self._dims: tuple[Hashable, ...] = self._parse_dimensions(dims) - self._attrs: dict | None = None if attrs else dict(attrs) + self._attrs: dict | None = dict(attrs) if attrs else None @property def ndim(self) -> int: @@ -363,9 +362,3 @@ def copy(self, deep: bool = True, data: DuckArray | None = None): pandas.DataFrame.copy """ return self._copy(deep=deep, data=data) - - # mutable objects should not be hashable - # https://github.com/python/mypy/issues/4266 - __hash__ = None # type: ignore[assignment] - - _array_counter = itertools.count() From 9545ca23ad38c8120becc6a1b82fb9eeac8df57b Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 23 Aug 2023 15:35:46 -0700 Subject: [PATCH 24/67] fix variable constructor --- xarray/core/variable.py | 27 +++++++++++++++++---------- xarray/namedarray/core.py | 3 ++- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index cbf1aec2f19..c1b8824a48e 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -246,7 +246,7 @@ def as_compatible_data(data, fastpath: bool = False): if isinstance(data, (Variable, DataArray)): return data.data - if isinstance(data, pd.Index): + if isinstance(data, NON_NUMPY_SUPPORTED_ARRAY_TYPES): data = _possibly_convert_datetime_or_timedelta_index(data) return _maybe_wrap_data(data) @@ -272,6 +272,11 @@ def as_compatible_data(data, fastpath: bool = False): else: data = np.asarray(data) + if not isinstance(data, np.ndarray) and ( + hasattr(data, "__array_function__") or hasattr(data, "__array_namespace__") + ): + return data + # validate whether the data is valid data types. data = np.asarray(data) @@ -347,8 +352,9 @@ def __init__(self, dims, data, attrs=None, encoding=None, fastpath=False): unrecognized encoding items. """ super().__init__( - data=as_compatible_data(data, fastpath=fastpath), dims=dims, attrs=attrs + dims=dims, data=as_compatible_data(data, fastpath=fastpath), attrs=attrs ) + self._encoding = None if encoding is not None: self.encoding = encoding @@ -384,11 +390,7 @@ def data(self) -> Any: @data.setter def data(self, data): data = as_compatible_data(data) - if data.shape != self.shape: - raise ValueError( - f"replacement data must match the Variable's shape. " - f"replacement data has shape {data.shape}; Variable has shape {self.shape}" - ) + self._check_shape(data) self._data = data def astype( @@ -919,11 +921,16 @@ def _replace( attrs=_default, encoding=_default, ) -> T_Variable: - new_object = super()._replace(dims, data, attrs) + if dims is _default: + dims = copy.copy(self._dims) + if data is _default: + data = copy.copy(self.data) + if attrs is _default: + attrs = copy.copy(self._attrs) + if encoding is _default: encoding = copy.copy(self._encoding) - new_object._encoding = encoding - return new_object + return type(self)(dims, data, attrs, encoding, fastpath=True) def chunk( self, diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 10aadbd42f8..5d71c1839e6 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -164,7 +164,7 @@ def data(self) -> DuckArray: return self._data - def _check_shape(self, new_data: DuckArray): + def _check_shape(self, new_data: DuckArray) -> None: if new_data.shape != self.shape: raise ValueError( f"replacement data must match the Variable's shape. " @@ -173,6 +173,7 @@ def _check_shape(self, new_data: DuckArray): @data.setter def data(self, data: DuckArray) -> None: + data = as_compatible_data(data) self._check_shape(data) self._data = data From e41a27c23d8cc9e3e785bf9aa3e8bd21a52036f9 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 23 Aug 2023 16:26:12 -0700 Subject: [PATCH 25/67] fix as_compatible_data utility function --- xarray/namedarray/core.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 5d71c1839e6..96a4377883a 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -37,6 +37,11 @@ def as_compatible_data(data, fastpath: bool = False): raise ValueError if isinstance(data, ExplicitlyIndexed): return data + + if not isinstance(data, np.ndarray) and ( + hasattr(data, "__array_function__") or hasattr(data, "__array_namespace__") + ): + return data if isinstance(data, tuple): data = to_0d_object_array(data) From 259e0bda04947b228526d525474dcebfb1cee132 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 23 Aug 2023 16:48:52 -0700 Subject: [PATCH 26/67] move _to_dense and _non_zero to NamedArray --- xarray/core/variable.py | 37 ----------------------------- xarray/namedarray/core.py | 49 ++++++++++++++++++++++++++++++++++----- 2 files changed, 43 insertions(+), 43 deletions(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index c1b8824a48e..0e5b6132f21 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -709,13 +709,6 @@ def _broadcast_indexes_outer(self, key): return dims, OuterIndexer(tuple(new_key)), None - def _nonzero(self): - """Equivalent numpy's nonzero but returns a tuple of Variables.""" - # TODO we should replace dask's native nonzero - # after https://github.com/dask/dask/issues/1076 is implemented. - nonzeros = np.nonzero(self.data) - return tuple(Variable((dim), nz) for nz, dim in zip(nonzeros, self.dims)) - def _broadcast_indexes_vectorized(self, key): variables = [] out_dims_set = OrderedSet() @@ -1080,36 +1073,6 @@ def as_numpy(self: T_Variable) -> T_Variable: """Coerces wrapped data into a numpy array, returning a Variable.""" return self._replace(data=self.to_numpy()) - def _as_sparse(self, sparse_format=_default, fill_value=dtypes.NA): - """ - use sparse-array as backend. - """ - import sparse - - # TODO: what to do if dask-backended? - if fill_value is dtypes.NA: - dtype, fill_value = dtypes.maybe_promote(self.dtype) - else: - dtype = dtypes.result_type(self.dtype, fill_value) - - if sparse_format is _default: - sparse_format = "coo" - try: - as_sparse = getattr(sparse, f"as_{sparse_format.lower()}") - except AttributeError: - raise ValueError(f"{sparse_format} is not a valid sparse format") - - data = as_sparse(self.data.astype(dtype), fill_value=fill_value) - return self._replace(data=data) - - def _to_dense(self): - """ - Change backend from sparse to np.array - """ - if hasattr(self._data, "todense"): - return self._replace(data=self._data.todense()) - return self.copy(deep=False) - def isel( self: T_Variable, indexers: Mapping[Any, Any] | None = None, diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 96a4377883a..c81148c2684 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -8,6 +8,7 @@ import numpy as np # TODO: get rid of this after migrating this class to array API +from xarray.core import dtypes from xarray.core.indexing import ExplicitlyIndexed from xarray.core.utils import _default from xarray.namedarray.utils import ( @@ -255,7 +256,7 @@ def chunks(self) -> tuple[tuple[int, ...], ...] | None: return getattr(self._data, "chunks", None) @property - def chunksizes(self) -> typing.Mapping[typing.Any, tuple[int, ...]]: + def chunksizes(self) -> typing.Mapping[typing.Any, tuple[int, ...]] | None: """ Mapping from dimension names to block lengths for this variable's data, or None if the underlying data is not a dask array. @@ -273,9 +274,9 @@ def chunksizes(self) -> typing.Mapping[typing.Any, tuple[int, ...]]: if hasattr(self._data, "chunks"): return Frozen(dict(zip(self.dims, self.data.chunks))) else: - return {} + return None - def _replace(self, dims=_default, data=_default, attrs=_default): + def _replace(self, dims=_default, data=_default, attrs=_default) -> NamedArray: if dims is _default: dims = copy.copy(self._dims) if data is _default: @@ -292,11 +293,10 @@ def _copy( ): if data is None: ndata = self._data - if deep: ndata = copy.deepcopy(ndata, memo=memo) else: - ndata = data + ndata = as_compatible_data(data) self._check_shape(ndata) attrs = ( @@ -305,7 +305,7 @@ def _copy( return self._replace(data=ndata, attrs=attrs) - def __copy__(self): + def __copy__(self) -> NamedArray: return self._copy(deep=False) def __deepcopy__(self, memo: dict[int, typing.Any] | None = None): @@ -368,3 +368,40 @@ def copy(self, deep: bool = True, data: DuckArray | None = None): pandas.DataFrame.copy """ return self._copy(deep=deep, data=data) + + def _nonzero(self): + """Equivalent numpy's nonzero but returns a tuple of Variables.""" + # TODO we should replace dask's native nonzero + # after https://github.com/dask/dask/issues/1076 is implemented. + nonzeros = np.nonzero(self.data) + return tuple(type(self)((dim), nz) for nz, dim in zip(nonzeros, self.dims)) + + def _as_sparse(self, sparse_format: str = _default, fill_value=dtypes.NA): + """ + use sparse-array as backend. + """ + import sparse + + # TODO: what to do if dask-backended? + if fill_value is dtypes.NA: + dtype, fill_value = dtypes.maybe_promote(self.dtype) + else: + dtype = dtypes.result_type(self.dtype, fill_value) + + if sparse_format is _default: + sparse_format = "coo" + try: + as_sparse = getattr(sparse, f"as_{sparse_format.lower()}") + except AttributeError: + raise ValueError(f"{sparse_format} is not a valid sparse format") + + data = as_sparse(self.data.astype(dtype), fill_value=fill_value) + return self._replace(data=data) + + def _to_dense(self): + """ + Change backend from sparse to np.array + """ + if hasattr(self._data, "todense"): + return self._replace(data=self._data.todense()) + return self.copy(deep=False) From a7ec7701d905c807567644142b67687cd305ca5d Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 24 Aug 2023 15:31:34 -0700 Subject: [PATCH 27/67] more typing --- xarray/namedarray/core.py | 127 ++++++++++++++++++++++--------------- xarray/namedarray/utils.py | 4 +- 2 files changed, 78 insertions(+), 53 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index c81148c2684..09f8d5f24c5 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -10,7 +10,7 @@ # TODO: get rid of this after migrating this class to array API from xarray.core import dtypes from xarray.core.indexing import ExplicitlyIndexed -from xarray.core.utils import _default +from xarray.core.utils import Default, _default from xarray.namedarray.utils import ( Frozen, is_duck_array, @@ -20,7 +20,9 @@ # temporary placeholder for indicating an array api compliant type. # hopefully in the future we can narrow this down more -DuckArray = typing.TypeVar("DuckArray", bound=typing.Any) +T_DuckArray = typing.TypeVar("T_DuckArray", bound=typing.Any) + +T_NamedArray = typing.TypeVar("T_NamedArray", bound="NamedArray") # TODO: Add tests! @@ -54,14 +56,17 @@ class NamedArray: __slots__ = ("_dims", "_data", "_attrs") def __init__( - self, dims: str | Iterable[Hashable], data: DuckArray, attrs: dict | None = None + self, + dims: str | Iterable[Hashable], + data: T_DuckArray, + attrs: dict | None = None, ): - self._data: DuckArray = as_compatible_data(data) + self._data: T_DuckArray = as_compatible_data(data) self._dims: tuple[Hashable, ...] = self._parse_dimensions(dims) self._attrs: dict | None = dict(attrs) if attrs else None @property - def ndim(self) -> int: + def ndim(self: T_NamedArray) -> int: """ Number of array dimensions. @@ -72,7 +77,7 @@ def ndim(self) -> int: return len(self.shape) @property - def size(self) -> int: + def size(self: T_NamedArray) -> int: """ Number of elements in the array. @@ -87,11 +92,11 @@ def size(self) -> int: def __len__(self) -> int: try: return self.shape[0] - except IndexError: - raise TypeError("len() of unsized object") + except Exception as exc: + raise TypeError("len() of unsized object") from exc @property - def dtype(self) -> np.dtype: + def dtype(self: T_NamedArray) -> np.dtype: """ Data-type of the array’s elements. @@ -103,7 +108,7 @@ def dtype(self) -> np.dtype: return self._data.dtype @property - def shape(self) -> tuple[int, ...]: + def shape(self: T_NamedArray) -> tuple[int, ...]: """ Tuple of array dimensions. @@ -114,7 +119,7 @@ def shape(self) -> tuple[int, ...]: return self._data.shape @property - def nbytes(self) -> int: + def nbytes(self: T_NamedArray) -> int: """ Total bytes consumed by the elements of the data array. @@ -127,15 +132,17 @@ def nbytes(self) -> int: return self.size * self.dtype.itemsize @property - def dims(self) -> tuple[Hashable, ...]: + def dims(self: T_NamedArray) -> tuple[Hashable, ...]: """Tuple of dimension names with which this variable is associated.""" return self._dims @dims.setter - def dims(self, value: str | Iterable[Hashable]) -> None: + def dims(self: T_NamedArray, value: str | Iterable[Hashable]) -> None: self._dims = self._parse_dimensions(value) - def _parse_dimensions(self, dims: str | Iterable[Hashable]) -> tuple[Hashable, ...]: + def _parse_dimensions( + self: T_NamedArray, dims: str | Iterable[Hashable] + ) -> tuple[Hashable, ...]: dims = (dims,) if isinstance(dims, str) else tuple(dims) if len(dims) != self.ndim: raise ValueError( @@ -145,18 +152,25 @@ def _parse_dimensions(self, dims: str | Iterable[Hashable]) -> tuple[Hashable, . return dims @property - def attrs(self) -> dict[typing.Any, typing.Any]: + def attrs(self: T_NamedArray) -> dict[typing.Any, typing.Any]: """Dictionary of local attributes on this variable.""" if self._attrs is None: self._attrs = {} return self._attrs @attrs.setter - def attrs(self, value: Mapping) -> None: + def attrs(self: T_NamedArray, value: Mapping) -> None: self._attrs = dict(value) + def _check_shape(self, new_data: T_DuckArray) -> None: + if new_data.shape != self.shape: + raise ValueError( + f"replacement data must match the Variable's shape. " + f"replacement data has shape {new_data.shape}; Variable has shape {self.shape}" + ) + @property - def data(self) -> DuckArray: + def data(self: T_NamedArray): """ The Variable's data as an array. The underlying array type (e.g. dask, sparse, pint) is preserved. @@ -170,21 +184,14 @@ def data(self) -> DuckArray: return self._data - def _check_shape(self, new_data: DuckArray) -> None: - if new_data.shape != self.shape: - raise ValueError( - f"replacement data must match the Variable's shape. " - f"replacement data has shape {new_data.shape}; Variable has shape {self.shape}" - ) - @data.setter - def data(self, data: DuckArray) -> None: + def data(self: T_NamedArray, data: T_DuckArray) -> None: data = as_compatible_data(data) self._check_shape(data) self._data = data @property - def real(self): + def real(self: T_NamedArray) -> T_NamedArray: """ The real part of the variable. @@ -195,7 +202,7 @@ def real(self): return self._replace(data=self.data.real) @property - def imag(self): + def imag(self: T_NamedArray) -> T_NamedArray: """ The imaginary part of the variable. @@ -205,44 +212,50 @@ def imag(self): """ return self._replace(data=self.data.imag) - def __dask_tokenize__(self): + def __dask_tokenize__(self: T_NamedArray): # Use v.data, instead of v._data, in order to cope with the wrappers # around NetCDF and the like from dask.base import normalize_token - return normalize_token((type(self), self._dims, self.data, self._attrs)) + return normalize_token((type(self), self._dims, self.data, self.attrs)) - def __dask_graph__(self): + def __dask_graph__(self: T_NamedArray): return self._data.__dask_graph__() if is_duck_dask_array(self._data) else None - def __dask_keys__(self): + def __dask_keys__(self: T_NamedArray): return self._data.__dask_keys__() - def __dask_layers__(self): + def __dask_layers__(self: T_NamedArray): return self._data.__dask_layers__() @property - def __dask_optimize__(self): + def __dask_optimize__(self: T_NamedArray) -> typing.Callable: return self._data.__dask_optimize__ @property - def __dask_scheduler__(self): + def __dask_scheduler__(self: T_NamedArray) -> typing.Callable: return self._data.__dask_scheduler__ - def __dask_postcompute__(self): + def __dask_postcompute__( + self: T_NamedArray, + ) -> tuple[typing.Callable, tuple[typing.Any, ...]]: array_func, array_args = self._data.__dask_postcompute__() return self._dask_finalize, (array_func,) + array_args - def __dask_postpersist__(self): + def __dask_postpersist__( + self: T_NamedArray, + ) -> tuple[typing.Callable, tuple[typing.Any, ...]]: array_func, array_args = self._data.__dask_postpersist__() return self._dask_finalize, (array_func,) + array_args - def _dask_finalize(self, results, array_func, *args, **kwargs): + def _dask_finalize( + self: T_NamedArray, results, array_func, *args, **kwargs + ) -> T_NamedArray: data = array_func(results, *args, **kwargs) return type(self)(self._dims, data, attrs=self._attrs) @property - def chunks(self) -> tuple[tuple[int, ...], ...] | None: + def chunks(self: T_NamedArray) -> tuple[tuple[int, ...], ...] | None: """ Tuple of block lengths for this dataarray's data, in order of dimensions, or None if the underlying data is not a dask array. @@ -256,7 +269,9 @@ def chunks(self) -> tuple[tuple[int, ...], ...] | None: return getattr(self._data, "chunks", None) @property - def chunksizes(self) -> typing.Mapping[typing.Any, tuple[int, ...]] | None: + def chunksizes( + self: T_NamedArray, + ) -> typing.Mapping[typing.Any, tuple[int, ...]] | None: """ Mapping from dimension names to block lengths for this variable's data, or None if the underlying data is not a dask array. @@ -276,7 +291,9 @@ def chunksizes(self) -> typing.Mapping[typing.Any, tuple[int, ...]] | None: else: return None - def _replace(self, dims=_default, data=_default, attrs=_default) -> NamedArray: + def _replace( + self: T_NamedArray, dims=_default, data=_default, attrs=_default + ) -> T_NamedArray: if dims is _default: dims = copy.copy(self._dims) if data is _default: @@ -286,11 +303,11 @@ def _replace(self, dims=_default, data=_default, attrs=_default) -> NamedArray: return type(self)(dims, data, attrs) def _copy( - self, + self: T_NamedArray, deep: bool = True, - data: DuckArray | None = None, + data: T_DuckArray | None = None, memo: dict[int, typing.Any] | None = None, - ): + ) -> T_NamedArray: if data is None: ndata = self._data if deep: @@ -305,13 +322,17 @@ def _copy( return self._replace(data=ndata, attrs=attrs) - def __copy__(self) -> NamedArray: + def __copy__(self: T_NamedArray) -> T_NamedArray: return self._copy(deep=False) - def __deepcopy__(self, memo: dict[int, typing.Any] | None = None): + def __deepcopy__( + self: T_NamedArray, memo: dict[int, typing.Any] | None = None + ) -> T_NamedArray: return self._copy(deep=True, memo=memo) - def copy(self, deep: bool = True, data: DuckArray | None = None): + def copy( + self: T_NamedArray, deep: bool = True, data: T_DuckArray | None = None + ) -> T_NamedArray: """Returns a copy of this object. If `deep=True`, the data array is loaded into memory and copied onto @@ -369,14 +390,18 @@ def copy(self, deep: bool = True, data: DuckArray | None = None): """ return self._copy(deep=deep, data=data) - def _nonzero(self): + def _nonzero(self: T_NamedArray) -> tuple[T_NamedArray, ...]: """Equivalent numpy's nonzero but returns a tuple of Variables.""" # TODO we should replace dask's native nonzero # after https://github.com/dask/dask/issues/1076 is implemented. nonzeros = np.nonzero(self.data) return tuple(type(self)((dim), nz) for nz, dim in zip(nonzeros, self.dims)) - def _as_sparse(self, sparse_format: str = _default, fill_value=dtypes.NA): + def _as_sparse( + self: T_NamedArray, + sparse_format: str | Default = _default, + fill_value=dtypes.NA, + ) -> T_NamedArray: """ use sparse-array as backend. """ @@ -392,13 +417,13 @@ def _as_sparse(self, sparse_format: str = _default, fill_value=dtypes.NA): sparse_format = "coo" try: as_sparse = getattr(sparse, f"as_{sparse_format.lower()}") - except AttributeError: - raise ValueError(f"{sparse_format} is not a valid sparse format") + except AttributeError as exc: + raise ValueError(f"{sparse_format} is not a valid sparse format") from exc data = as_sparse(self.data.astype(dtype), fill_value=fill_value) return self._replace(data=data) - def _to_dense(self): + def _to_dense(self: T_NamedArray) -> T_NamedArray: """ Change backend from sparse to np.array """ diff --git a/xarray/namedarray/utils.py b/xarray/namedarray/utils.py index 8c48e4acb33..204666bd520 100644 --- a/xarray/namedarray/utils.py +++ b/xarray/namedarray/utils.py @@ -56,7 +56,7 @@ def module_available(module: str) -> bool: return importlib.util.find_spec(module) is not None -def is_dask_collection(x): +def is_dask_collection(x: typing.Any) -> bool: if module_available("dask"): from dask.base import is_dask_collection @@ -78,7 +78,7 @@ def is_duck_array(value: typing.Any) -> bool: ) -def is_duck_dask_array(x): +def is_duck_dask_array(x: typing.Any) -> bool: return is_duck_array(x) and is_dask_collection(x) From c55f35acf118ccdbb82af851424c5778c538a55e Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 24 Aug 2023 15:48:56 -0700 Subject: [PATCH 28/67] add initial tests --- xarray/tests/test_namedarray.py | 83 +++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 xarray/tests/test_namedarray.py diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py new file mode 100644 index 00000000000..6da660d40a2 --- /dev/null +++ b/xarray/tests/test_namedarray.py @@ -0,0 +1,83 @@ +import numpy as np +import pytest + +from xarray.namedarray.core import NamedArray + + +@pytest.mark.parametrize( + "dims, data, attrs", [("x", [1, 2, 3], {"key": "value"}), ("y", [4, 5], None)] +) +def test_named_array_initialization(dims, data, attrs): + named_array = NamedArray(dims, data, attrs) + assert named_array.dims == (dims,) + assert np.array_equal(named_array.data, data) + assert named_array.attrs == (attrs or {}) + + +@pytest.mark.parametrize( + "dims, data, expected_ndim, expected_size, expected_dtype, expected_shape, expected_len", + [ + ("x", [1, 2, 3], 1, 3, np.dtype(int), (3,), 3), + (["x", "y"], [[1, 2], [3, 4]], 2, 4, np.dtype(int), (2, 2), 2), + ], +) +def test_named_array_properties( + dims, + data, + expected_ndim, + expected_size, + expected_dtype, + expected_shape, + expected_len, +): + named_array = NamedArray(dims, data) + expected_nbytes = expected_size * np.array(data).dtype.itemsize + assert named_array.ndim == expected_ndim + assert named_array.size == expected_size + assert named_array.dtype == expected_dtype + assert named_array.shape == expected_shape + assert named_array.nbytes == expected_nbytes + assert len(named_array) == expected_len + + +@pytest.mark.parametrize( + "initial_dims, initial_data, new_dims", + [ + ("x", [1, 2, 3], "y"), + (["x", "y"], [[1, 2], [3, 4]], ["a", "b"]), + ], +) +def test_named_array_dims_setter(initial_dims, initial_data, new_dims): + named_array = NamedArray(initial_dims, initial_data) + named_array.dims = new_dims + assert named_array.dims == tuple(new_dims) + + +@pytest.mark.parametrize( + "initial_dims, initial_data, new_attrs", + [ + ("x", [1, 2, 3], {"new_key": "new_value"}), + (["x", "y"], [[1, 2], [3, 4]], {"a": 1, "b": 2}), + # Edge case: empty attributes + ("x", [1, 2, 3], {}), + ], +) +def test_named_array_attrs_setter(initial_dims, initial_data, new_attrs): + named_array = NamedArray(initial_dims, initial_data) + named_array.attrs = new_attrs + assert named_array.attrs == new_attrs + + +@pytest.mark.parametrize( + "initial_dims, initial_data, new_data", + [ + ("x", [1, 2, 3], [4, 5, 6]), + (["x", "y"], [[1, 2], [3, 4]], [[4, 5], [6, 7]]), + # Edge case: setting data with the same values + ("x", [1, 2, 3], [1, 2, 3]), + ], +) +def test_named_array_data_setter(initial_dims, initial_data, new_data): + named_array = NamedArray(initial_dims, initial_data) + named_array.data = new_data + assert np.array_equal(named_array.data, new_data) From 34a262aa0144eb4392a3d853b1177d82ccf7f6c6 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe <13301940+andersy005@users.noreply.github.com> Date: Thu, 31 Aug 2023 13:24:33 -0700 Subject: [PATCH 29/67] Apply suggestions from code review Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> --- xarray/namedarray/core.py | 8 ++++---- xarray/tests/test_namedarray.py | 10 +++++----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 09f8d5f24c5..0127aad647b 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -26,7 +26,7 @@ # TODO: Add tests! -def as_compatible_data(data, fastpath: bool = False): +def as_compatible_data(data: T_DuckArray | np.typing.ArrayLike, fastpath: bool = False) -> T_DuckArray: if fastpath and getattr(data, "ndim", 0) > 0: # can't use fastpath (yet) for scalars return data @@ -58,7 +58,7 @@ class NamedArray: def __init__( self, dims: str | Iterable[Hashable], - data: T_DuckArray, + data: T_DuckArray | np.typing.ArrayLike, attrs: dict | None = None, ): self._data: T_DuckArray = as_compatible_data(data) @@ -271,7 +271,7 @@ def chunks(self: T_NamedArray) -> tuple[tuple[int, ...], ...] | None: @property def chunksizes( self: T_NamedArray, - ) -> typing.Mapping[typing.Any, tuple[int, ...]] | None: + ) -> typing.Mapping[typing.Any, tuple[int, ...]]: """ Mapping from dimension names to block lengths for this variable's data, or None if the underlying data is not a dask array. @@ -289,7 +289,7 @@ def chunksizes( if hasattr(self._data, "chunks"): return Frozen(dict(zip(self.dims, self.data.chunks))) else: - return None + return {} def _replace( self: T_NamedArray, dims=_default, data=_default, attrs=_default diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py index 6da660d40a2..8f099c745fe 100644 --- a/xarray/tests/test_namedarray.py +++ b/xarray/tests/test_namedarray.py @@ -7,7 +7,7 @@ @pytest.mark.parametrize( "dims, data, attrs", [("x", [1, 2, 3], {"key": "value"}), ("y", [4, 5], None)] ) -def test_named_array_initialization(dims, data, attrs): +def test_named_array_initialization(dims, data, attrs) -> None: named_array = NamedArray(dims, data, attrs) assert named_array.dims == (dims,) assert np.array_equal(named_array.data, data) @@ -29,7 +29,7 @@ def test_named_array_properties( expected_dtype, expected_shape, expected_len, -): +) -> None: named_array = NamedArray(dims, data) expected_nbytes = expected_size * np.array(data).dtype.itemsize assert named_array.ndim == expected_ndim @@ -47,7 +47,7 @@ def test_named_array_properties( (["x", "y"], [[1, 2], [3, 4]], ["a", "b"]), ], ) -def test_named_array_dims_setter(initial_dims, initial_data, new_dims): +def test_named_array_dims_setter(initial_dims, initial_data, new_dims) -> None: named_array = NamedArray(initial_dims, initial_data) named_array.dims = new_dims assert named_array.dims == tuple(new_dims) @@ -62,7 +62,7 @@ def test_named_array_dims_setter(initial_dims, initial_data, new_dims): ("x", [1, 2, 3], {}), ], ) -def test_named_array_attrs_setter(initial_dims, initial_data, new_attrs): +def test_named_array_attrs_setter(initial_dims, initial_data, new_attrs) -> None: named_array = NamedArray(initial_dims, initial_data) named_array.attrs = new_attrs assert named_array.attrs == new_attrs @@ -77,7 +77,7 @@ def test_named_array_attrs_setter(initial_dims, initial_data, new_attrs): ("x", [1, 2, 3], [1, 2, 3]), ], ) -def test_named_array_data_setter(initial_dims, initial_data, new_data): +def test_named_array_data_setter(initial_dims, initial_data, new_data) -> None: named_array = NamedArray(initial_dims, initial_data) named_array.data = new_data assert np.array_equal(named_array.data, new_data) From 790bfc2c4412262e3010a0b5cb51652592d79f70 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 31 Aug 2023 20:25:50 +0000 Subject: [PATCH 30/67] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/namedarray/core.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 0127aad647b..4f4f5b8745f 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -26,7 +26,9 @@ # TODO: Add tests! -def as_compatible_data(data: T_DuckArray | np.typing.ArrayLike, fastpath: bool = False) -> T_DuckArray: +def as_compatible_data( + data: T_DuckArray | np.typing.ArrayLike, fastpath: bool = False +) -> T_DuckArray: if fastpath and getattr(data, "ndim", 0) > 0: # can't use fastpath (yet) for scalars return data From a31da0022cd368b230c2465cf56a71e2cd3b0f4c Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 11 Sep 2023 21:24:34 +0200 Subject: [PATCH 31/67] attempt to fix some mypy errors --- xarray/namedarray/core.py | 10 +++++----- xarray/namedarray/utils.py | 14 +++++++++++++- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 4f4f5b8745f..24578072df8 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -18,11 +18,11 @@ to_0d_object_array, ) -# temporary placeholder for indicating an array api compliant type. -# hopefully in the future we can narrow this down more -T_DuckArray = typing.TypeVar("T_DuckArray", bound=typing.Any) +if typing.TYPE_CHECKING: -T_NamedArray = typing.TypeVar("T_NamedArray", bound="NamedArray") + from xarray.namedarray.util import T_DuckArray + + T_NamedArray = typing.TypeVar("T_NamedArray", bound="NamedArray") # TODO: Add tests! @@ -31,7 +31,7 @@ def as_compatible_data( ) -> T_DuckArray: if fastpath and getattr(data, "ndim", 0) > 0: # can't use fastpath (yet) for scalars - return data + return typing.cast(T_DuckArray, data) # TODO : check scalar if is_duck_array(data): diff --git a/xarray/namedarray/utils.py b/xarray/namedarray/utils.py index 204666bd520..2c3800f71bf 100644 --- a/xarray/namedarray/utils.py +++ b/xarray/namedarray/utils.py @@ -1,11 +1,23 @@ from __future__ import annotations import importlib +import sys import typing from collections.abc import Iterator, Mapping import numpy as np + +if typing.TYPE_CHECKING: + if sys.version_info >= (3, 10): + from typing import TypeGuard + else: + from typing_extensions import TypeGuard + + # temporary placeholder for indicating an array api compliant type. + # hopefully in the future we can narrow this down more + T_DuckArray = typing.TypeVar("T_DuckArray", bound=typing.Any) + K = typing.TypeVar("K") V = typing.TypeVar("V") T = typing.TypeVar("T") @@ -64,7 +76,7 @@ def is_dask_collection(x: typing.Any) -> bool: return False -def is_duck_array(value: typing.Any) -> bool: +def is_duck_array(value: typing.Any) -> TypeGuard[T_DuckArray]: if isinstance(value, np.ndarray): return True return ( From b6c0af5824814fb0d8a8d48494d5e392fcb2e76a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 11 Sep 2023 19:25:16 +0000 Subject: [PATCH 32/67] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/namedarray/core.py | 1 - xarray/namedarray/utils.py | 1 - 2 files changed, 2 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 24578072df8..0bc70380911 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -19,7 +19,6 @@ ) if typing.TYPE_CHECKING: - from xarray.namedarray.util import T_DuckArray T_NamedArray = typing.TypeVar("T_NamedArray", bound="NamedArray") diff --git a/xarray/namedarray/utils.py b/xarray/namedarray/utils.py index 2c3800f71bf..225bf4a3e0b 100644 --- a/xarray/namedarray/utils.py +++ b/xarray/namedarray/utils.py @@ -7,7 +7,6 @@ import numpy as np - if typing.TYPE_CHECKING: if sys.version_info >= (3, 10): from typing import TypeGuard From b1e42aaaa85b21dd2cd77752d805fef53d308b63 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 11 Sep 2023 21:29:37 +0200 Subject: [PATCH 33/67] Update core.py --- xarray/namedarray/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 24578072df8..16c8359396f 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -20,7 +20,7 @@ if typing.TYPE_CHECKING: - from xarray.namedarray.util import T_DuckArray + from xarray.namedarray.utils import T_DuckArray T_NamedArray = typing.TypeVar("T_NamedArray", bound="NamedArray") From 2661001b9a20e6007e80d8eefbcca37e270540c3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 11 Sep 2023 19:31:17 +0000 Subject: [PATCH 34/67] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/namedarray/core.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 16c8359396f..3a752fb8b03 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -19,7 +19,6 @@ ) if typing.TYPE_CHECKING: - from xarray.namedarray.utils import T_DuckArray T_NamedArray = typing.TypeVar("T_NamedArray", bound="NamedArray") From b2a1cda7a3f5508a829a5eeb6eb497615ab7be44 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 11 Sep 2023 21:38:02 +0200 Subject: [PATCH 35/67] Update core.py --- xarray/namedarray/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 16c8359396f..cd7fd6a6941 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -46,12 +46,12 @@ def as_compatible_data( if not isinstance(data, np.ndarray) and ( hasattr(data, "__array_function__") or hasattr(data, "__array_namespace__") ): - return data + return typing.cast(T_DuckArray, data) if isinstance(data, tuple): data = to_0d_object_array(data) # validate whether the data is valid data types. - return np.asarray(data) + return typing.cast(T_DuckArray, np.asarray(data)) class NamedArray: From b25a8ff83f47a2cc9277925715a5f56ceef091a2 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 11 Sep 2023 21:54:41 +0200 Subject: [PATCH 36/67] All input data can be arraylike --- xarray/namedarray/core.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 19a5cfd66d5..91691d8ace3 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -22,12 +22,11 @@ from xarray.namedarray.utils import T_DuckArray T_NamedArray = typing.TypeVar("T_NamedArray", bound="NamedArray") + T_InputData = typing.Union[T_DuckArray, np.typing.ArrayLike] # TODO: Add tests! -def as_compatible_data( - data: T_DuckArray | np.typing.ArrayLike, fastpath: bool = False -) -> T_DuckArray: +def as_compatible_data(data: T_InputData, fastpath: bool = False) -> T_DuckArray: if fastpath and getattr(data, "ndim", 0) > 0: # can't use fastpath (yet) for scalars return typing.cast(T_DuckArray, data) @@ -40,7 +39,8 @@ def as_compatible_data( if isinstance(data, np.ma.MaskedArray): raise ValueError if isinstance(data, ExplicitlyIndexed): - return data + # TODO: better that is_duck_array(ExplicitlyIndexed) -> True + return typing.cast(T_DuckArray, data) if not isinstance(data, np.ndarray) and ( hasattr(data, "__array_function__") or hasattr(data, "__array_namespace__") @@ -59,7 +59,7 @@ class NamedArray: def __init__( self, dims: str | Iterable[Hashable], - data: T_DuckArray | np.typing.ArrayLike, + data: T_InputData, attrs: dict | None = None, ): self._data: T_DuckArray = as_compatible_data(data) @@ -186,7 +186,7 @@ def data(self: T_NamedArray): return self._data @data.setter - def data(self: T_NamedArray, data: T_DuckArray) -> None: + def data(self: T_NamedArray, data: T_InputData) -> None: data = as_compatible_data(data) self._check_shape(data) self._data = data @@ -306,7 +306,7 @@ def _replace( def _copy( self: T_NamedArray, deep: bool = True, - data: T_DuckArray | None = None, + data: T_InputData | None = None, memo: dict[int, typing.Any] | None = None, ) -> T_NamedArray: if data is None: @@ -332,7 +332,7 @@ def __deepcopy__( return self._copy(deep=True, memo=memo) def copy( - self: T_NamedArray, deep: bool = True, data: T_DuckArray | None = None + self: T_NamedArray, deep: bool = True, data: T_InputData | None = None ) -> T_NamedArray: """Returns a copy of this object. From 06d77ade11b3251e200d088e13fdf1cecd622cd5 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 11 Sep 2023 23:12:43 +0200 Subject: [PATCH 37/67] Update core.py --- xarray/namedarray/core.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 91691d8ace3..08d3588cde7 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -22,11 +22,12 @@ from xarray.namedarray.utils import T_DuckArray T_NamedArray = typing.TypeVar("T_NamedArray", bound="NamedArray") - T_InputData = typing.Union[T_DuckArray, np.typing.ArrayLike] # TODO: Add tests! -def as_compatible_data(data: T_InputData, fastpath: bool = False) -> T_DuckArray: +def as_compatible_data( + data: T_DuckArray | np.typing.ArrayLike, fastpath: bool = False +) -> T_DuckArray: if fastpath and getattr(data, "ndim", 0) > 0: # can't use fastpath (yet) for scalars return typing.cast(T_DuckArray, data) @@ -59,7 +60,7 @@ class NamedArray: def __init__( self, dims: str | Iterable[Hashable], - data: T_InputData, + data: T_DuckArray | np.typing.ArrayLike, attrs: dict | None = None, ): self._data: T_DuckArray = as_compatible_data(data) @@ -186,7 +187,7 @@ def data(self: T_NamedArray): return self._data @data.setter - def data(self: T_NamedArray, data: T_InputData) -> None: + def data(self: T_NamedArray, data: T_DuckArray | np.typing.ArrayLike) -> None: data = as_compatible_data(data) self._check_shape(data) self._data = data @@ -306,7 +307,7 @@ def _replace( def _copy( self: T_NamedArray, deep: bool = True, - data: T_InputData | None = None, + data: T_DuckArray | np.typing.ArrayLike | None = None, memo: dict[int, typing.Any] | None = None, ) -> T_NamedArray: if data is None: @@ -332,7 +333,9 @@ def __deepcopy__( return self._copy(deep=True, memo=memo) def copy( - self: T_NamedArray, deep: bool = True, data: T_InputData | None = None + self: T_NamedArray, + deep: bool = True, + data: T_DuckArray | np.typing.ArrayLike | None = None, ) -> T_NamedArray: """Returns a copy of this object. From 96ac4ec3f8c7be27b2e8ec9cf4cf3f613d17c7c0 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 11 Sep 2023 23:42:34 +0200 Subject: [PATCH 38/67] Update core.py --- xarray/namedarray/core.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 08d3588cde7..bce0e1042c8 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -22,6 +22,8 @@ from xarray.namedarray.utils import T_DuckArray T_NamedArray = typing.TypeVar("T_NamedArray", bound="NamedArray") + DimsInput = typing.Union[str, Iterable[Hashable]] + Dims = tuple[Hashable, ...] # TODO: Add tests! @@ -43,10 +45,10 @@ def as_compatible_data( # TODO: better that is_duck_array(ExplicitlyIndexed) -> True return typing.cast(T_DuckArray, data) - if not isinstance(data, np.ndarray) and ( - hasattr(data, "__array_function__") or hasattr(data, "__array_namespace__") - ): - return typing.cast(T_DuckArray, data) + # if not isinstance(data, np.ndarray) and ( + # hasattr(data, "__array_function__") or hasattr(data, "__array_namespace__") + # ): + # return typing.cast(T_DuckArray, data) if isinstance(data, tuple): data = to_0d_object_array(data) @@ -59,12 +61,12 @@ class NamedArray: def __init__( self, - dims: str | Iterable[Hashable], + dims: DimsInput, data: T_DuckArray | np.typing.ArrayLike, attrs: dict | None = None, ): self._data: T_DuckArray = as_compatible_data(data) - self._dims: tuple[Hashable, ...] = self._parse_dimensions(dims) + self._dims: Dims = self._parse_dimensions(dims) self._attrs: dict | None = dict(attrs) if attrs else None @property @@ -134,17 +136,15 @@ def nbytes(self: T_NamedArray) -> int: return self.size * self.dtype.itemsize @property - def dims(self: T_NamedArray) -> tuple[Hashable, ...]: + def dims(self: T_NamedArray) -> Dims: """Tuple of dimension names with which this variable is associated.""" return self._dims @dims.setter - def dims(self: T_NamedArray, value: str | Iterable[Hashable]) -> None: + def dims(self: T_NamedArray, value: DimsInput) -> None: self._dims = self._parse_dimensions(value) - def _parse_dimensions( - self: T_NamedArray, dims: str | Iterable[Hashable] - ) -> tuple[Hashable, ...]: + def _parse_dimensions(self: T_NamedArray, dims: DimsInput) -> Dims: dims = (dims,) if isinstance(dims, str) else tuple(dims) if len(dims) != self.ndim: raise ValueError( @@ -399,7 +399,7 @@ def _nonzero(self: T_NamedArray) -> tuple[T_NamedArray, ...]: # TODO we should replace dask's native nonzero # after https://github.com/dask/dask/issues/1076 is implemented. nonzeros = np.nonzero(self.data) - return tuple(type(self)((dim), nz) for nz, dim in zip(nonzeros, self.dims)) + return tuple(type(self)((dim,), nz) for nz, dim in zip(nonzeros, self.dims)) def _as_sparse( self: T_NamedArray, From 760cb483e20cd64849ec9b36ea99d299be4897ac Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 12 Sep 2023 00:11:43 +0200 Subject: [PATCH 39/67] get and set attrs at the same level. --- xarray/core/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index a96d1f34c98..c73c1360435 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -7481,7 +7481,7 @@ def _unary_op(self: T_Dataset, f, *args, **kwargs) -> T_Dataset: else: variables[k] = f(v, *args, **kwargs) if keep_attrs: - variables[k].attrs = v._attrs + variables[k]._attrs = v._attrs attrs = self._attrs if keep_attrs else None return self._replace_with_new_dims(variables, attrs=attrs) From 15c73001522aa5654ac2fd701f33604af1f47bb3 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 12 Sep 2023 00:12:12 +0200 Subject: [PATCH 40/67] data doesn't have to be ndarray --- xarray/core/parallelcompat.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/xarray/core/parallelcompat.py b/xarray/core/parallelcompat.py index 26efc5fc412..26115787601 100644 --- a/xarray/core/parallelcompat.py +++ b/xarray/core/parallelcompat.py @@ -256,9 +256,7 @@ def normalize_chunks( raise NotImplementedError() @abstractmethod - def from_array( - self, data: np.ndarray, chunks: T_Chunks, **kwargs - ) -> T_ChunkedArray: + def from_array(self, data: Any, chunks: T_Chunks, **kwargs) -> T_ChunkedArray: """ Create a chunked array from a non-chunked numpy-like array. From bbe3db4f48a20f0f6e5ecebd9094312da35cbfda Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 12 Sep 2023 00:13:01 +0200 Subject: [PATCH 41/67] avoid redefining typing use new variable names instead --- xarray/core/variable.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 6f7440c4df3..bc1e526f4c8 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -883,11 +883,13 @@ def _copy( memo: dict[int, Any] | None = None, ) -> T_Variable: if data is None: - ndata = self._data + data_old = self._data - if isinstance(ndata, indexing.MemoryCachedArray): + if isinstance(data_old, indexing.MemoryCachedArray): # don't share caching between copies - ndata = indexing.MemoryCachedArray(ndata.array) + ndata = indexing.MemoryCachedArray(data_old.array) + else: + ndata = data_old if deep: ndata = copy.deepcopy(ndata, memo) @@ -1021,11 +1023,11 @@ def chunk( inline_array=inline_array, ) - data = self._data - if chunkmanager.is_chunked_array(data): - data = chunkmanager.rechunk(data, chunks) # type: ignore[arg-type] + data_old = self._data + if chunkmanager.is_chunked_array(data_old): + data_chunked = chunkmanager.rechunk(data_old, chunks) # type: ignore[arg-type] else: - if isinstance(data, indexing.ExplicitlyIndexed): + if isinstance(data_old, indexing.ExplicitlyIndexed): # Unambiguously handle array storage backends (like NetCDF4 and h5py) # that can't handle general array indexing. For example, in netCDF4 you # can do "outer" indexing along two dimensions independent, which works @@ -1034,20 +1036,22 @@ def chunk( # Using OuterIndexer is a pragmatic choice: dask does not yet handle # different indexing types in an explicit way: # https://github.com/dask/dask/issues/2883 - data = indexing.ImplicitToExplicitIndexingAdapter( - data, indexing.OuterIndexer + ndata = indexing.ImplicitToExplicitIndexingAdapter( + data_old, indexing.OuterIndexer ) + else: + ndata = data_old if utils.is_dict_like(chunks): - chunks = tuple(chunks.get(n, s) for n, s in enumerate(data.shape)) + chunks = tuple(chunks.get(n, s) for n, s in enumerate(ndata.shape)) - data = chunkmanager.from_array( - data, + data_chunked = chunkmanager.from_array( + ndata, chunks, # type: ignore[arg-type] **_from_array_kwargs, ) - return self._replace(data=data) + return self._replace(data=data_chunked) def to_numpy(self) -> np.ndarray: """Coerces wrapped data to numpy and returns a numpy.ndarray""" From 22336629902863ac74dd6cf5a6edef2ee390b8ae Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 12 Sep 2023 00:18:30 +0200 Subject: [PATCH 42/67] import on runtime as well to be able to cast --- xarray/namedarray/core.py | 3 +-- xarray/namedarray/utils.py | 6 +++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index bce0e1042c8..18ff9d1d5d5 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -13,14 +13,13 @@ from xarray.core.utils import Default, _default from xarray.namedarray.utils import ( Frozen, + T_DuckArray, is_duck_array, is_duck_dask_array, to_0d_object_array, ) if typing.TYPE_CHECKING: - from xarray.namedarray.utils import T_DuckArray - T_NamedArray = typing.TypeVar("T_NamedArray", bound="NamedArray") DimsInput = typing.Union[str, Iterable[Hashable]] Dims = tuple[Hashable, ...] diff --git a/xarray/namedarray/utils.py b/xarray/namedarray/utils.py index 225bf4a3e0b..e75d1a0cfdb 100644 --- a/xarray/namedarray/utils.py +++ b/xarray/namedarray/utils.py @@ -13,9 +13,9 @@ else: from typing_extensions import TypeGuard - # temporary placeholder for indicating an array api compliant type. - # hopefully in the future we can narrow this down more - T_DuckArray = typing.TypeVar("T_DuckArray", bound=typing.Any) +# temporary placeholder for indicating an array api compliant type. +# hopefully in the future we can narrow this down more +T_DuckArray = typing.TypeVar("T_DuckArray", bound=typing.Any) K = typing.TypeVar("K") V = typing.TypeVar("V") From fb2ca4daa6957236dbe33a07e8eb63652b6695c3 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 12 Sep 2023 00:35:53 +0200 Subject: [PATCH 43/67] requires ufunc and function to be a valid duck array --- xarray/tests/test_formatting.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/xarray/tests/test_formatting.py b/xarray/tests/test_formatting.py index 7670b77322c..2361184a370 100644 --- a/xarray/tests/test_formatting.py +++ b/xarray/tests/test_formatting.py @@ -552,6 +552,9 @@ def _repr_inline_(self, width): def __array_function__(self, *args, **kwargs): return NotImplemented + def __array_ufunc__(self, *args, **kwargs): + return NotImplemented + @property def shape(self) -> tuple[int, ...]: return self.value.shape From cf91823a46b6e9112aba51b9d3446cc7a10822cc Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Fri, 15 Sep 2023 16:16:42 +0200 Subject: [PATCH 44/67] Add array_namespace --- xarray/tests/test_dataset.py | 3 +++ xarray/tests/test_formatting.py | 5 +---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 882285ac8ec..99ef4fcb3d1 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -415,6 +415,9 @@ def __init__(self): def __array_function__(self, *args, **kwargs): pass + def __array_namespace__(self, *args, **kwargs): + pass + def __repr__(self): return "Custom\nArray" diff --git a/xarray/tests/test_formatting.py b/xarray/tests/test_formatting.py index 2361184a370..5ca134503e8 100644 --- a/xarray/tests/test_formatting.py +++ b/xarray/tests/test_formatting.py @@ -549,10 +549,7 @@ def _repr_inline_(self, width): return formatted - def __array_function__(self, *args, **kwargs): - return NotImplemented - - def __array_ufunc__(self, *args, **kwargs): + def __array_namespace__(self, *args, **kwargs): return NotImplemented @property From f21297b4c4caa9ec8d0d38c3ffce688c54da5190 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Fri, 15 Sep 2023 19:36:37 +0200 Subject: [PATCH 45/67] Update test_dataset.py --- xarray/tests/test_dataset.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 99ef4fcb3d1..f1947adc1a7 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -410,6 +410,7 @@ def test_repr_nep18(self) -> None: class Array: def __init__(self): self.shape = (2,) + self.ndim = 1 self.dtype = np.dtype(np.float64) def __array_function__(self, *args, **kwargs): From 4fafb0256cc7f62552852ef08bee79afab2bea55 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Fri, 15 Sep 2023 19:37:44 +0200 Subject: [PATCH 46/67] Update test_dataset.py --- xarray/tests/test_dataset.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index f1947adc1a7..0df50b70d2e 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -414,10 +414,10 @@ def __init__(self): self.dtype = np.dtype(np.float64) def __array_function__(self, *args, **kwargs): - pass + return NotImplemented - def __array_namespace__(self, *args, **kwargs): - pass + def __array_ufunc__(self, *args, **kwargs): + return NotImplemented def __repr__(self): return "Custom\nArray" From c5fb91dea5b4629be2914924f682e25688d05e0c Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Fri, 15 Sep 2023 11:46:47 -0700 Subject: [PATCH 47/67] remove Frozen --- xarray/namedarray/core.py | 3 +-- xarray/namedarray/utils.py | 28 ---------------------------- 2 files changed, 1 insertion(+), 30 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 18ff9d1d5d5..91b1ea396d4 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -12,7 +12,6 @@ from xarray.core.indexing import ExplicitlyIndexed from xarray.core.utils import Default, _default from xarray.namedarray.utils import ( - Frozen, T_DuckArray, is_duck_array, is_duck_dask_array, @@ -288,7 +287,7 @@ def chunksizes( xarray.unify_chunks """ if hasattr(self._data, "chunks"): - return Frozen(dict(zip(self.dims, self.data.chunks))) + return dict(zip(self.dims, self.data.chunks)) else: return {} diff --git a/xarray/namedarray/utils.py b/xarray/namedarray/utils.py index e75d1a0cfdb..40d91a16aa3 100644 --- a/xarray/namedarray/utils.py +++ b/xarray/namedarray/utils.py @@ -3,7 +3,6 @@ import importlib import sys import typing -from collections.abc import Iterator, Mapping import numpy as np @@ -22,33 +21,6 @@ T = typing.TypeVar("T") -class Frozen(Mapping[K, V]): - """Wrapper around an object implementing the mapping interface to make it - immutable. If you really want to modify the mapping, the mutable version is - saved under the `mapping` attribute. - """ - - __slots__ = ("mapping",) - - def __init__(self, mapping: Mapping[K, V]): - self.mapping = mapping - - def __getitem__(self, key: K) -> V: - return self.mapping[key] - - def __iter__(self) -> Iterator[K]: - return iter(self.mapping) - - def __len__(self) -> int: - return len(self.mapping) - - def __contains__(self, key: object) -> bool: - return key in self.mapping - - def __repr__(self) -> str: - return f"{type(self).__name__}({self.mapping!r})" - - def module_available(module: str) -> bool: """Checks whether a module is installed without importing it. From 4708ca2feb30208157d1ba51f7931dac79c1b6c1 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 19 Sep 2023 14:21:03 -0700 Subject: [PATCH 48/67] update tests --- xarray/namedarray/core.py | 4 --- xarray/tests/test_namedarray.py | 48 ++++++++++++++++++++++----------- 2 files changed, 33 insertions(+), 19 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 91b1ea396d4..ca1f0101e64 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -43,10 +43,6 @@ def as_compatible_data( # TODO: better that is_duck_array(ExplicitlyIndexed) -> True return typing.cast(T_DuckArray, data) - # if not isinstance(data, np.ndarray) and ( - # hasattr(data, "__array_function__") or hasattr(data, "__array_namespace__") - # ): - # return typing.cast(T_DuckArray, data) if isinstance(data, tuple): data = to_0d_object_array(data) diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py index 8f099c745fe..7d24552c6c2 100644 --- a/xarray/tests/test_namedarray.py +++ b/xarray/tests/test_namedarray.py @@ -1,13 +1,14 @@ import numpy as np import pytest -from xarray.namedarray.core import NamedArray +from xarray.namedarray.core import NamedArray, as_compatible_data +from xarray.namedarray.utils import T_DuckArray @pytest.mark.parametrize( "dims, data, attrs", [("x", [1, 2, 3], {"key": "value"}), ("y", [4, 5], None)] ) -def test_named_array_initialization(dims, data, attrs) -> None: +def test_named_array_initialization(dims: str, data: T_DuckArray, attrs: dict) -> None: named_array = NamedArray(dims, data, attrs) assert named_array.dims == (dims,) assert np.array_equal(named_array.data, data) @@ -22,13 +23,13 @@ def test_named_array_initialization(dims, data, attrs) -> None: ], ) def test_named_array_properties( - dims, - data, - expected_ndim, - expected_size, - expected_dtype, - expected_shape, - expected_len, + dims: str, + data: T_DuckArray, + expected_ndim: int, + expected_size: int, + expected_dtype: np.dtype, + expected_shape: tuple, + expected_len: int, ) -> None: named_array = NamedArray(dims, data) expected_nbytes = expected_size * np.array(data).dtype.itemsize @@ -47,7 +48,9 @@ def test_named_array_properties( (["x", "y"], [[1, 2], [3, 4]], ["a", "b"]), ], ) -def test_named_array_dims_setter(initial_dims, initial_data, new_dims) -> None: +def test_named_array_dims_setter( + initial_dims: str, initial_data: T_DuckArray, new_dims: str +) -> None: named_array = NamedArray(initial_dims, initial_data) named_array.dims = new_dims assert named_array.dims == tuple(new_dims) @@ -58,11 +61,12 @@ def test_named_array_dims_setter(initial_dims, initial_data, new_dims) -> None: [ ("x", [1, 2, 3], {"new_key": "new_value"}), (["x", "y"], [[1, 2], [3, 4]], {"a": 1, "b": 2}), - # Edge case: empty attributes ("x", [1, 2, 3], {}), ], ) -def test_named_array_attrs_setter(initial_dims, initial_data, new_attrs) -> None: +def test_named_array_attrs_setter( + initial_dims: str, initial_data: T_DuckArray, new_attrs: dict +) -> None: named_array = NamedArray(initial_dims, initial_data) named_array.attrs = new_attrs assert named_array.attrs == new_attrs @@ -73,11 +77,25 @@ def test_named_array_attrs_setter(initial_dims, initial_data, new_attrs) -> None [ ("x", [1, 2, 3], [4, 5, 6]), (["x", "y"], [[1, 2], [3, 4]], [[4, 5], [6, 7]]), - # Edge case: setting data with the same values - ("x", [1, 2, 3], [1, 2, 3]), ], ) -def test_named_array_data_setter(initial_dims, initial_data, new_data) -> None: +def test_named_array_data_setter( + initial_dims: str, initial_data: T_DuckArray, new_data: T_DuckArray +) -> None: named_array = NamedArray(initial_dims, initial_data) named_array.data = new_data assert np.array_equal(named_array.data, new_data) + + +@pytest.mark.parametrize( + "input_data, expected_output", + [ + ([1, 2, 3], np.array([1, 2, 3])), + (np.array([4, 5, 6]), np.array([4, 5, 6])), + ], +) +def test_as_compatible_data( + input_data: T_DuckArray, expected_output: T_DuckArray +) -> None: + output = as_compatible_data(input_data) + assert np.array_equal(output, expected_output) From ff1b4de5d6b1c703c5fd4e02389dc489bad8d911 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 19 Sep 2023 21:32:39 -0700 Subject: [PATCH 49/67] update tests --- xarray/namedarray/core.py | 12 +++ xarray/tests/test_namedarray.py | 183 ++++++++++++++++++-------------- 2 files changed, 114 insertions(+), 81 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index ca1f0101e64..4d1731ee776 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -287,6 +287,18 @@ def chunksizes( else: return {} + @property + def sizes(self: typing.Any) -> dict[Hashable, int]: + """Ordered mapping from dimension names to lengths. + + Immutable. + + See Also + -------- + Dataset.sizes + """ + return dict(zip(self.dims, self.shape)) + def _replace( self: T_NamedArray, dims=_default, data=_default, attrs=_default ) -> T_NamedArray: diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py index 7d24552c6c2..fb56388d84f 100644 --- a/xarray/tests/test_namedarray.py +++ b/xarray/tests/test_namedarray.py @@ -1,3 +1,5 @@ +from abc import ABC, abstractclassmethod + import numpy as np import pytest @@ -6,96 +8,115 @@ @pytest.mark.parametrize( - "dims, data, attrs", [("x", [1, 2, 3], {"key": "value"}), ("y", [4, 5], None)] -) -def test_named_array_initialization(dims: str, data: T_DuckArray, attrs: dict) -> None: - named_array = NamedArray(dims, data, attrs) - assert named_array.dims == (dims,) - assert np.array_equal(named_array.data, data) - assert named_array.attrs == (attrs or {}) - - -@pytest.mark.parametrize( - "dims, data, expected_ndim, expected_size, expected_dtype, expected_shape, expected_len", + "input_data, expected_output", [ - ("x", [1, 2, 3], 1, 3, np.dtype(int), (3,), 3), - (["x", "y"], [[1, 2], [3, 4]], 2, 4, np.dtype(int), (2, 2), 2), + ([1, 2, 3], np.array([1, 2, 3])), + (np.array([4, 5, 6]), np.array([4, 5, 6])), ], ) -def test_named_array_properties( - dims: str, - data: T_DuckArray, - expected_ndim: int, - expected_size: int, - expected_dtype: np.dtype, - expected_shape: tuple, - expected_len: int, +def test_as_compatible_data( + input_data: T_DuckArray, expected_output: T_DuckArray ) -> None: - named_array = NamedArray(dims, data) - expected_nbytes = expected_size * np.array(data).dtype.itemsize - assert named_array.ndim == expected_ndim - assert named_array.size == expected_size - assert named_array.dtype == expected_dtype - assert named_array.shape == expected_shape - assert named_array.nbytes == expected_nbytes - assert len(named_array) == expected_len + output = as_compatible_data(input_data) + assert np.array_equal(output, expected_output) -@pytest.mark.parametrize( - "initial_dims, initial_data, new_dims", - [ - ("x", [1, 2, 3], "y"), - (["x", "y"], [[1, 2], [3, 4]], ["a", "b"]), - ], -) -def test_named_array_dims_setter( - initial_dims: str, initial_data: T_DuckArray, new_dims: str -) -> None: - named_array = NamedArray(initial_dims, initial_data) - named_array.dims = new_dims - assert named_array.dims == tuple(new_dims) +class NamedArraySubclassObjects(ABC): + @abstractclassmethod + def cls(self, *args, **kwargs) -> NamedArray: + raise NotImplementedError + def test_properties(self): + data = 0.5 * np.arange(10).reshape(2, 5) + named_array = self.cls(["x", "y"], data, {"key": "value"}) + assert named_array.dims == ("x", "y") + assert np.array_equal(named_array.data, data) + assert named_array.attrs == {"key": "value"} + assert named_array.ndim == 2 + assert named_array.sizes == {"x": 2, "y": 5} + assert named_array.size == 10 + assert named_array.nbytes == 80 + assert len(named_array) == 2 -@pytest.mark.parametrize( - "initial_dims, initial_data, new_attrs", - [ - ("x", [1, 2, 3], {"new_key": "new_value"}), - (["x", "y"], [[1, 2], [3, 4]], {"a": 1, "b": 2}), - ("x", [1, 2, 3], {}), - ], -) -def test_named_array_attrs_setter( - initial_dims: str, initial_data: T_DuckArray, new_attrs: dict -) -> None: - named_array = NamedArray(initial_dims, initial_data) - named_array.attrs = new_attrs - assert named_array.attrs == new_attrs + def test_attrs(self): + named_array = self.cls(["x", "y"], np.arange(10).reshape(2, 5)) + assert named_array.attrs == {} + named_array.attrs["key"] = "value" + assert named_array.attrs == {"key": "value"} + named_array.attrs = {"key": "value2"} + assert named_array.attrs == {"key": "value2"} -@pytest.mark.parametrize( - "initial_dims, initial_data, new_data", - [ - ("x", [1, 2, 3], [4, 5, 6]), - (["x", "y"], [[1, 2], [3, 4]], [[4, 5], [6, 7]]), - ], -) -def test_named_array_data_setter( - initial_dims: str, initial_data: T_DuckArray, new_data: T_DuckArray -) -> None: - named_array = NamedArray(initial_dims, initial_data) - named_array.data = new_data - assert np.array_equal(named_array.data, new_data) +class TestNamedArray(NamedArraySubclassObjects): + def cls(self, *args, **kwargs) -> NamedArray: + return NamedArray(*args, **kwargs) + @pytest.fixture(autouse=True) + def setup(self): + self.inputs = np.random.random((3, 4, 5)).astype(np.float32) -@pytest.mark.parametrize( - "input_data, expected_output", - [ - ([1, 2, 3], np.array([1, 2, 3])), - (np.array([4, 5, 6]), np.array([4, 5, 6])), - ], -) -def test_as_compatible_data( - input_data: T_DuckArray, expected_output: T_DuckArray -) -> None: - output = as_compatible_data(input_data) - assert np.array_equal(output, expected_output) + def test_data(self): + named_array = NamedArray(["x", "y", "z"], self.inputs) + assert np.array_equal(named_array.data, self.inputs) + with pytest.raises(ValueError): + named_array.data = np.random.random((3, 4)).astype(np.float64) + + @pytest.mark.parametrize( + "data, dtype", + [ + ("foo", np.dtype("U3")), + (np.bytes_("foo"), np.dtype("S3")), + ], + ) + def test_0d_string(self, data, dtype): + named_array = NamedArray([], data) + assert named_array.data == data + assert named_array.dims == () + assert named_array.sizes == {} + assert named_array.attrs == {} + assert named_array.ndim == 0 + assert named_array.size == 1 + assert named_array.dtype == dtype + + def test_0d_datetime(self): + named_array = NamedArray([], np.datetime64("2000-01-01")) + assert named_array.dtype == np.dtype("datetime64[D]") + + @pytest.mark.parametrize( + "timedelta, expected_dtype", + [ + (np.timedelta64(1, "D"), np.dtype("timedelta64[D]")), + (np.timedelta64(1, "s"), np.dtype("timedelta64[s]")), + (np.timedelta64(1, "m"), np.dtype("timedelta64[m]")), + (np.timedelta64(1, "h"), np.dtype("timedelta64[h]")), + (np.timedelta64(1, "us"), np.dtype("timedelta64[us]")), + (np.timedelta64(1, "ns"), np.dtype("timedelta64[ns]")), + (np.timedelta64(1, "ps"), np.dtype("timedelta64[ps]")), + (np.timedelta64(1, "fs"), np.dtype("timedelta64[fs]")), + (np.timedelta64(1, "as"), np.dtype("timedelta64[as]")), + ], + ) + def test_0d_timedelta(self, timedelta, expected_dtype): + named_array = NamedArray([], timedelta) + assert named_array.dtype == expected_dtype + assert named_array.data == timedelta + + @pytest.mark.parametrize( + "dims, data_shape, new_dims, raises", + [ + (["x", "y", "z"], (2, 3, 4), ["a", "b", "c"], False), + (["x", "y", "z"], (2, 3, 4), ["a", "b"], True), + (["x", "y", "z"], (2, 4, 5), ["a", "b", "c", "d"], True), + ([], [], (), False), + ([], [], ("x",), True), + ], + ) + def test_dims_setter(self, dims, data_shape, new_dims, raises): + named_array = NamedArray(dims, np.random.random(data_shape)) + assert named_array.dims == tuple(dims) + if raises: + with pytest.raises(ValueError): + named_array.dims = new_dims + else: + named_array.dims = new_dims + assert named_array.dims == tuple(new_dims) From 216206320efca2630400a0f339a780282352e671 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 19 Sep 2023 22:32:14 -0700 Subject: [PATCH 50/67] switch to functional API --- xarray/tests/test_namedarray.py | 200 ++++++++++++++++---------------- 1 file changed, 97 insertions(+), 103 deletions(-) diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py index fb56388d84f..430dcf068a6 100644 --- a/xarray/tests/test_namedarray.py +++ b/xarray/tests/test_namedarray.py @@ -1,5 +1,3 @@ -from abc import ABC, abstractclassmethod - import numpy as np import pytest @@ -7,6 +5,11 @@ from xarray.namedarray.utils import T_DuckArray +@pytest.fixture +def random_inputs(): + return np.random.random((3, 4, 5)).astype(np.float32) + + @pytest.mark.parametrize( "input_data, expected_output", [ @@ -14,109 +17,100 @@ (np.array([4, 5, 6]), np.array([4, 5, 6])), ], ) -def test_as_compatible_data( - input_data: T_DuckArray, expected_output: T_DuckArray -) -> None: - output = as_compatible_data(input_data) +def test_as_compatible_data(input_data: T_DuckArray, expected_output: T_DuckArray): + output: T_DuckArray = as_compatible_data(input_data) assert np.array_equal(output, expected_output) -class NamedArraySubclassObjects(ABC): - @abstractclassmethod - def cls(self, *args, **kwargs) -> NamedArray: - raise NotImplementedError - - def test_properties(self): - data = 0.5 * np.arange(10).reshape(2, 5) - named_array = self.cls(["x", "y"], data, {"key": "value"}) - assert named_array.dims == ("x", "y") - assert np.array_equal(named_array.data, data) - assert named_array.attrs == {"key": "value"} - assert named_array.ndim == 2 - assert named_array.sizes == {"x": 2, "y": 5} - assert named_array.size == 10 - assert named_array.nbytes == 80 - assert len(named_array) == 2 - - def test_attrs(self): - named_array = self.cls(["x", "y"], np.arange(10).reshape(2, 5)) - assert named_array.attrs == {} - named_array.attrs["key"] = "value" - assert named_array.attrs == {"key": "value"} - named_array.attrs = {"key": "value2"} - assert named_array.attrs == {"key": "value2"} - - -class TestNamedArray(NamedArraySubclassObjects): - def cls(self, *args, **kwargs) -> NamedArray: - return NamedArray(*args, **kwargs) - - @pytest.fixture(autouse=True) - def setup(self): - self.inputs = np.random.random((3, 4, 5)).astype(np.float32) - - def test_data(self): - named_array = NamedArray(["x", "y", "z"], self.inputs) - assert np.array_equal(named_array.data, self.inputs) +def test_properties(): + data = 0.5 * np.arange(10).reshape(2, 5) + named_array = NamedArray(["x", "y"], data, {"key": "value"}) + assert named_array.dims == ("x", "y") + assert np.array_equal(named_array.data, data) + assert named_array.attrs == {"key": "value"} + assert named_array.ndim == 2 + assert named_array.sizes == {"x": 2, "y": 5} + assert named_array.size == 10 + assert named_array.nbytes == 80 + assert len(named_array) == 2 + + +def test_attrs(): + named_array = NamedArray(["x", "y"], np.arange(10).reshape(2, 5)) + assert named_array.attrs == {} + named_array.attrs["key"] = "value" + assert named_array.attrs == {"key": "value"} + named_array.attrs = {"key": "value2"} + assert named_array.attrs == {"key": "value2"} + + +def test_data(random_inputs): + named_array = NamedArray(["x", "y", "z"], random_inputs) + assert np.array_equal(named_array.data, random_inputs) + with pytest.raises(ValueError): + named_array.data = np.random.random((3, 4)).astype(np.float64) + + +# Additional tests as per your original class-based code +@pytest.mark.parametrize( + "data, dtype", + [ + ("foo", np.dtype("U3")), + (np.bytes_("foo"), np.dtype("S3")), + ], +) +def test_0d_string(data, dtype): + named_array = NamedArray([], data) + assert named_array.data == data + assert named_array.dims == () + assert named_array.sizes == {} + assert named_array.attrs == {} + assert named_array.ndim == 0 + assert named_array.size == 1 + assert named_array.dtype == dtype + + +def test_0d_datetime(): + named_array = NamedArray([], np.datetime64("2000-01-01")) + assert named_array.dtype == np.dtype("datetime64[D]") + + +@pytest.mark.parametrize( + "timedelta, expected_dtype", + [ + (np.timedelta64(1, "D"), np.dtype("timedelta64[D]")), + (np.timedelta64(1, "s"), np.dtype("timedelta64[s]")), + (np.timedelta64(1, "m"), np.dtype("timedelta64[m]")), + (np.timedelta64(1, "h"), np.dtype("timedelta64[h]")), + (np.timedelta64(1, "us"), np.dtype("timedelta64[us]")), + (np.timedelta64(1, "ns"), np.dtype("timedelta64[ns]")), + (np.timedelta64(1, "ps"), np.dtype("timedelta64[ps]")), + (np.timedelta64(1, "fs"), np.dtype("timedelta64[fs]")), + (np.timedelta64(1, "as"), np.dtype("timedelta64[as]")), + ], +) +def test_0d_timedelta(timedelta, expected_dtype): + named_array = NamedArray([], timedelta) + assert named_array.dtype == expected_dtype + assert named_array.data == timedelta + + +@pytest.mark.parametrize( + "dims, data_shape, new_dims, raises", + [ + (["x", "y", "z"], (2, 3, 4), ["a", "b", "c"], False), + (["x", "y", "z"], (2, 3, 4), ["a", "b"], True), + (["x", "y", "z"], (2, 4, 5), ["a", "b", "c", "d"], True), + ([], [], (), False), + ([], [], ("x",), True), + ], +) +def test_dims_setter(dims, data_shape, new_dims, raises): + named_array = NamedArray(dims, np.random.random(data_shape)) + assert named_array.dims == tuple(dims) + if raises: with pytest.raises(ValueError): - named_array.data = np.random.random((3, 4)).astype(np.float64) - - @pytest.mark.parametrize( - "data, dtype", - [ - ("foo", np.dtype("U3")), - (np.bytes_("foo"), np.dtype("S3")), - ], - ) - def test_0d_string(self, data, dtype): - named_array = NamedArray([], data) - assert named_array.data == data - assert named_array.dims == () - assert named_array.sizes == {} - assert named_array.attrs == {} - assert named_array.ndim == 0 - assert named_array.size == 1 - assert named_array.dtype == dtype - - def test_0d_datetime(self): - named_array = NamedArray([], np.datetime64("2000-01-01")) - assert named_array.dtype == np.dtype("datetime64[D]") - - @pytest.mark.parametrize( - "timedelta, expected_dtype", - [ - (np.timedelta64(1, "D"), np.dtype("timedelta64[D]")), - (np.timedelta64(1, "s"), np.dtype("timedelta64[s]")), - (np.timedelta64(1, "m"), np.dtype("timedelta64[m]")), - (np.timedelta64(1, "h"), np.dtype("timedelta64[h]")), - (np.timedelta64(1, "us"), np.dtype("timedelta64[us]")), - (np.timedelta64(1, "ns"), np.dtype("timedelta64[ns]")), - (np.timedelta64(1, "ps"), np.dtype("timedelta64[ps]")), - (np.timedelta64(1, "fs"), np.dtype("timedelta64[fs]")), - (np.timedelta64(1, "as"), np.dtype("timedelta64[as]")), - ], - ) - def test_0d_timedelta(self, timedelta, expected_dtype): - named_array = NamedArray([], timedelta) - assert named_array.dtype == expected_dtype - assert named_array.data == timedelta - - @pytest.mark.parametrize( - "dims, data_shape, new_dims, raises", - [ - (["x", "y", "z"], (2, 3, 4), ["a", "b", "c"], False), - (["x", "y", "z"], (2, 3, 4), ["a", "b"], True), - (["x", "y", "z"], (2, 4, 5), ["a", "b", "c", "d"], True), - ([], [], (), False), - ([], [], ("x",), True), - ], - ) - def test_dims_setter(self, dims, data_shape, new_dims, raises): - named_array = NamedArray(dims, np.random.random(data_shape)) - assert named_array.dims == tuple(dims) - if raises: - with pytest.raises(ValueError): - named_array.dims = new_dims - else: named_array.dims = new_dims - assert named_array.dims == tuple(new_dims) + else: + named_array.dims = new_dims + assert named_array.dims == tuple(new_dims) From e530dd19f17c6ab0e39ea32dd0f52a6c82a6f094 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 19 Sep 2023 22:39:33 -0700 Subject: [PATCH 51/67] add fastpath --- xarray/namedarray/core.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 4d1731ee776..37cf2137d23 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -58,8 +58,9 @@ def __init__( dims: DimsInput, data: T_DuckArray | np.typing.ArrayLike, attrs: dict | None = None, + fastpath: bool = False, ): - self._data: T_DuckArray = as_compatible_data(data) + self._data: T_DuckArray = as_compatible_data(data, fastpath=fastpath) self._dims: Dims = self._parse_dimensions(dims) self._attrs: dict | None = dict(attrs) if attrs else None From 0f42857784b40d4eee7f636c85eec94ca607c606 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 20 Sep 2023 20:51:03 +0200 Subject: [PATCH 52/67] Test making sizes dict[Hashable, int] --- xarray/core/common.py | 4 ++-- xarray/core/groupby.py | 8 ++++---- xarray/namedarray/core.py | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/xarray/core/common.py b/xarray/core/common.py index ade701457c6..ea88ddf01e5 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -222,7 +222,7 @@ def _get_axis_num(self: Any, dim: Hashable) -> int: raise ValueError(f"{dim!r} not found in array dimensions {self.dims!r}") @property - def sizes(self: Any) -> Frozen[Hashable, int]: + def sizes(self: Any) -> dict[Hashable, int]: """Ordered mapping from dimension names to lengths. Immutable. @@ -231,7 +231,7 @@ def sizes(self: Any) -> Frozen[Hashable, int]: -------- Dataset.sizes """ - return Frozen(dict(zip(self.dims, self.shape))) + return dict(zip(self.dims, self.shape)) class AttrAccessMixin: diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 9894a4a4daf..edfb6c85905 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -37,6 +37,7 @@ from xarray.core.types import Dims, QuantileMethods, T_DataArray, T_Xarray from xarray.core.utils import ( either_dict_or_kwargs, + Frozen, hashable, is_scalar, maybe_wrap_array, @@ -51,7 +52,6 @@ from xarray.core.dataset import Dataset from xarray.core.resample_cftime import CFTimeGrouper from xarray.core.types import DatetimeLike, SideOptions - from xarray.core.utils import Frozen GroupKey = Any GroupIndex = Union[int, slice, list[int]] @@ -757,9 +757,9 @@ def sizes(self) -> Frozen[Hashable, int]: Dataset.sizes """ if self._sizes is None: - self._sizes = self._obj.isel( - {self._group_dim: self._group_indices[0]} - ).sizes + self._sizes = Frozen( + self._obj.isel({self._group_dim: self._group_indices[0]}).sizes + ) return self._sizes diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 37cf2137d23..5f68c14adb5 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -289,7 +289,7 @@ def chunksizes( return {} @property - def sizes(self: typing.Any) -> dict[Hashable, int]: + def sizes(self: T_NamedArray) -> dict[Hashable, int]: """Ordered mapping from dimension names to lengths. Immutable. From afc72288c668a0962b6a03ba5e9d3eb3de4c2dfd Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 20 Sep 2023 18:51:48 +0000 Subject: [PATCH 53/67] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/core/groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index edfb6c85905..0bc3d01c13d 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -36,8 +36,8 @@ from xarray.core.pycompat import integer_types from xarray.core.types import Dims, QuantileMethods, T_DataArray, T_Xarray from xarray.core.utils import ( - either_dict_or_kwargs, Frozen, + either_dict_or_kwargs, hashable, is_scalar, maybe_wrap_array, From 32ec4ea262a55f9bdb9652964c2ebdfe6d97310d Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 20 Sep 2023 21:07:58 +0200 Subject: [PATCH 54/67] A lot of errors... Try Mapping instead --- xarray/core/common.py | 4 ++-- xarray/core/groupby.py | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/xarray/core/common.py b/xarray/core/common.py index ea88ddf01e5..5aa2c8f4e19 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -222,7 +222,7 @@ def _get_axis_num(self: Any, dim: Hashable) -> int: raise ValueError(f"{dim!r} not found in array dimensions {self.dims!r}") @property - def sizes(self: Any) -> dict[Hashable, int]: + def sizes(self: Any) -> Mapping[Hashable, int]: """Ordered mapping from dimension names to lengths. Immutable. @@ -231,7 +231,7 @@ def sizes(self: Any) -> dict[Hashable, int]: -------- Dataset.sizes """ - return dict(zip(self.dims, self.shape)) + return Frozen(dict(zip(self.dims, self.shape))) class AttrAccessMixin: diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 0bc3d01c13d..1b906a52685 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -36,7 +36,6 @@ from xarray.core.pycompat import integer_types from xarray.core.types import Dims, QuantileMethods, T_DataArray, T_Xarray from xarray.core.utils import ( - Frozen, either_dict_or_kwargs, hashable, is_scalar, @@ -52,6 +51,7 @@ from xarray.core.dataset import Dataset from xarray.core.resample_cftime import CFTimeGrouper from xarray.core.types import DatetimeLike, SideOptions + from xarray.core.utils import Frozen GroupKey = Any GroupIndex = Union[int, slice, list[int]] @@ -746,7 +746,7 @@ def __init__( self._sizes = None @property - def sizes(self) -> Frozen[Hashable, int]: + def sizes(self) -> Mapping[Hashable, int]: """Ordered mapping from dimension names to lengths. Immutable. @@ -757,9 +757,9 @@ def sizes(self) -> Frozen[Hashable, int]: Dataset.sizes """ if self._sizes is None: - self._sizes = Frozen( - self._obj.isel({self._group_dim: self._group_indices[0]}).sizes - ) + self._sizes = self._obj.isel( + {self._group_dim: self._group_indices[0]} + ).sizes return self._sizes From 76bb881ed3f2ed8029c1e1d16c4959a1b09f31b5 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 20 Sep 2023 21:12:18 +0200 Subject: [PATCH 55/67] Update groupby.py --- xarray/core/groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 1b906a52685..e9ddf044568 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -699,7 +699,7 @@ class GroupBy(Generic[T_Xarray]): _groups: dict[GroupKey, GroupIndex] | None _dims: tuple[Hashable, ...] | Frozen[Hashable, int] | None - _sizes: Frozen[Hashable, int] | None + _sizes: Mapping[Hashable, int] | None def __init__( self, From df777418db7ce4bfca49c022d3f4895566127df7 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Thu, 21 Sep 2023 21:16:16 +0200 Subject: [PATCH 56/67] Update types.py --- xarray/core/types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/types.py b/xarray/core/types.py index e9e700b038e..87c750953af 100644 --- a/xarray/core/types.py +++ b/xarray/core/types.py @@ -107,7 +107,7 @@ def dims(self) -> Frozen[Hashable, int] | tuple[Hashable, ...]: ... @property - def sizes(self) -> Frozen[Hashable, int]: + def sizes(self) -> Mapping[Hashable, int]: ... @property From 8bf13b58f1b22c6b039e9a7a154d68b4bf62230f Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe <13301940+andersy005@users.noreply.github.com> Date: Mon, 25 Sep 2023 10:24:09 -0700 Subject: [PATCH 57/67] Apply suggestions from code review Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> Co-authored-by: Deepak Cherian --- xarray/tests/test_namedarray.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py index 430dcf068a6..deacbab409f 100644 --- a/xarray/tests/test_namedarray.py +++ b/xarray/tests/test_namedarray.py @@ -6,8 +6,8 @@ @pytest.fixture -def random_inputs(): - return np.random.random((3, 4, 5)).astype(np.float32) +def random_inputs() -> np.ndarray: + return np.arange(3*4*5, dtype=np.float32).reshape((3, 4, 5)) @pytest.mark.parametrize( @@ -17,12 +17,12 @@ def random_inputs(): (np.array([4, 5, 6]), np.array([4, 5, 6])), ], ) -def test_as_compatible_data(input_data: T_DuckArray, expected_output: T_DuckArray): +def test_as_compatible_data(input_data: T_DuckArray, expected_output: T_DuckArray) -> None: output: T_DuckArray = as_compatible_data(input_data) assert np.array_equal(output, expected_output) -def test_properties(): +def test_properties() -> None: data = 0.5 * np.arange(10).reshape(2, 5) named_array = NamedArray(["x", "y"], data, {"key": "value"}) assert named_array.dims == ("x", "y") @@ -35,7 +35,7 @@ def test_properties(): assert len(named_array) == 2 -def test_attrs(): +def test_attrs() -> None: named_array = NamedArray(["x", "y"], np.arange(10).reshape(2, 5)) assert named_array.attrs == {} named_array.attrs["key"] = "value" @@ -44,7 +44,7 @@ def test_attrs(): assert named_array.attrs == {"key": "value2"} -def test_data(random_inputs): +def test_data(random_inputs) -> None: named_array = NamedArray(["x", "y", "z"], random_inputs) assert np.array_equal(named_array.data, random_inputs) with pytest.raises(ValueError): @@ -59,7 +59,7 @@ def test_data(random_inputs): (np.bytes_("foo"), np.dtype("S3")), ], ) -def test_0d_string(data, dtype): +def test_0d_string(data, dtype: np.typing.DTypeLike) -> None: named_array = NamedArray([], data) assert named_array.data == data assert named_array.dims == () @@ -70,7 +70,7 @@ def test_0d_string(data, dtype): assert named_array.dtype == dtype -def test_0d_datetime(): +def test_0d_datetime() -> None: named_array = NamedArray([], np.datetime64("2000-01-01")) assert named_array.dtype == np.dtype("datetime64[D]") @@ -89,7 +89,7 @@ def test_0d_datetime(): (np.timedelta64(1, "as"), np.dtype("timedelta64[as]")), ], ) -def test_0d_timedelta(timedelta, expected_dtype): +def test_0d_timedelta(timedelta, expected_dtype: np.dtype) -> None: named_array = NamedArray([], timedelta) assert named_array.dtype == expected_dtype assert named_array.data == timedelta @@ -105,7 +105,7 @@ def test_0d_timedelta(timedelta, expected_dtype): ([], [], ("x",), True), ], ) -def test_dims_setter(dims, data_shape, new_dims, raises): +def test_dims_setter(dims, data_shape, new_dims, raises: bool) -> None: named_array = NamedArray(dims, np.random.random(data_shape)) assert named_array.dims == tuple(dims) if raises: From 2f0192fa7d57851f88da4573a105adf394655e2d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 25 Sep 2023 17:24:49 +0000 Subject: [PATCH 58/67] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/tests/test_namedarray.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py index deacbab409f..1e4d1ad7fa1 100644 --- a/xarray/tests/test_namedarray.py +++ b/xarray/tests/test_namedarray.py @@ -7,7 +7,7 @@ @pytest.fixture def random_inputs() -> np.ndarray: - return np.arange(3*4*5, dtype=np.float32).reshape((3, 4, 5)) + return np.arange(3 * 4 * 5, dtype=np.float32).reshape((3, 4, 5)) @pytest.mark.parametrize( @@ -17,7 +17,9 @@ def random_inputs() -> np.ndarray: (np.array([4, 5, 6]), np.array([4, 5, 6])), ], ) -def test_as_compatible_data(input_data: T_DuckArray, expected_output: T_DuckArray) -> None: +def test_as_compatible_data( + input_data: T_DuckArray, expected_output: T_DuckArray +) -> None: output: T_DuckArray = as_compatible_data(input_data) assert np.array_equal(output, expected_output) From 3f22902c07b9b4fd1ea080425498d5827a09060e Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Mon, 25 Sep 2023 10:52:33 -0700 Subject: [PATCH 59/67] update docstrings --- xarray/namedarray/core.py | 105 ++++++++++++++++++++++++-------------- 1 file changed, 68 insertions(+), 37 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 5f68c14adb5..4c3bc2f1978 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -51,6 +51,11 @@ def as_compatible_data( class NamedArray: + + """A lightweight wrapper around duck arrays with named dimensions and attributes which describe a single Array. + Numeric operations on this object implement array broadcasting and dimension alignment based on dimension names, + rather than axis order.""" + __slots__ = ("_dims", "_data", "_attrs") def __init__( @@ -60,6 +65,37 @@ def __init__( attrs: dict | None = None, fastpath: bool = False, ): + """ + Parameters + ---------- + dims : str or iterable of str + Name(s) of the dimension(s). + data : T_DuckArray or np.typing.ArrayLike + The actual data that populates the array. Should match the shape specified by `dims`. + attrs : dict, optional + A dictionary containing any additional information or attributes you want to store with the array. + Default is None, meaning no attributes will be stored. + fastpath : bool, optional + A flag to indicate if certain validations should be skipped for performance reasons. + Should only be True if you are certain about the integrity of the input data. + Default is False. + + Raises + ------ + ValueError + If the `dims` length does not match the number of data dimensions (ndim). + + Example + ------- + >>> na = NamedArray("time", np.array([1, 2, 3]), attrs={"units": "seconds"}) + >>> na.dims + ('time',) + >>> na.data + array([1, 2, 3]) + >>> na.attrs + {'units': 'seconds'} + + """ self._data: T_DuckArray = as_compatible_data(data, fastpath=fastpath) self._dims: Dims = self._parse_dimensions(dims) self._attrs: dict | None = dict(attrs) if attrs else None @@ -109,7 +145,14 @@ def dtype(self: T_NamedArray) -> np.dtype: @property def shape(self: T_NamedArray) -> tuple[int, ...]: """ - Tuple of array dimensions. + + + Returns + ------- + shape : tuple of ints + Tuple of array dimensions. + + See Also -------- @@ -132,7 +175,7 @@ def nbytes(self: T_NamedArray) -> int: @property def dims(self: T_NamedArray) -> Dims: - """Tuple of dimension names with which this variable is associated.""" + """Tuple of dimension names with which this NamedArray is associated.""" return self._dims @dims.setter @@ -150,7 +193,7 @@ def _parse_dimensions(self: T_NamedArray, dims: DimsInput) -> Dims: @property def attrs(self: T_NamedArray) -> dict[typing.Any, typing.Any]: - """Dictionary of local attributes on this variable.""" + """Dictionary of local attributes on this NamedArray.""" if self._attrs is None: self._attrs = {} return self._attrs @@ -162,21 +205,16 @@ def attrs(self: T_NamedArray, value: Mapping) -> None: def _check_shape(self, new_data: T_DuckArray) -> None: if new_data.shape != self.shape: raise ValueError( - f"replacement data must match the Variable's shape. " - f"replacement data has shape {new_data.shape}; Variable has shape {self.shape}" + f"replacement data must match the {self.__class__.__name__}'s shape. " + f"replacement data has shape {new_data.shape}; {self.__class__.__name__} has shape {self.shape}" ) @property def data(self: T_NamedArray): """ - The Variable's data as an array. The underlying array type + The NamedArray's data as an array. The underlying array type (e.g. dask, sparse, pint) is preserved. - See Also - -------- - Variable.to_numpy - Variable.as_numpy - Variable.values """ return self._data @@ -190,7 +228,7 @@ def data(self: T_NamedArray, data: T_DuckArray | np.typing.ArrayLike) -> None: @property def real(self: T_NamedArray) -> T_NamedArray: """ - The real part of the variable. + The real part of the NamedArray. See Also -------- @@ -201,7 +239,7 @@ def real(self: T_NamedArray) -> T_NamedArray: @property def imag(self: T_NamedArray) -> T_NamedArray: """ - The imaginary part of the variable. + The imaginary part of the NamedArray. See Also -------- @@ -254,13 +292,13 @@ def _dask_finalize( @property def chunks(self: T_NamedArray) -> tuple[tuple[int, ...], ...] | None: """ - Tuple of block lengths for this dataarray's data, in order of dimensions, or None if + Tuple of block lengths for this NamedArray's data, in order of dimensions, or None if the underlying data is not a dask array. See Also -------- - Variable.chunk - Variable.chunksizes + NamedArray.chunk + NamedArray.chunksizes xarray.unify_chunks """ return getattr(self._data, "chunks", None) @@ -270,17 +308,17 @@ def chunksizes( self: T_NamedArray, ) -> typing.Mapping[typing.Any, tuple[int, ...]]: """ - Mapping from dimension names to block lengths for this variable's data, or None if + Mapping from dimension names to block lengths for this namedArray's data, or None if the underlying data is not a dask array. Cannot be modified directly, but can be modified by calling .chunk(). - Differs from variable.chunks because it returns a mapping of dimensions to chunk shapes + Differs from NamedArray.chunks because it returns a mapping of dimensions to chunk shapes instead of a tuple of chunk shapes. See Also -------- - Variable.chunk - Variable.chunks + NamedArray.chunk + NamedArray.chunks xarray.unify_chunks """ if hasattr(self._data, "chunks"): @@ -290,14 +328,7 @@ def chunksizes( @property def sizes(self: T_NamedArray) -> dict[Hashable, int]: - """Ordered mapping from dimension names to lengths. - - Immutable. - - See Also - -------- - Dataset.sizes - """ + """Ordered mapping from dimension names to lengths.""" return dict(zip(self.dims, self.shape)) def _replace( @@ -363,25 +394,25 @@ def copy( Returns ------- - object : Variable - New object with dimensions, attributes, encodings, and optionally + object : NamedArray + New object with dimensions, attributes, and optionally data copied from original. Examples -------- Shallow copy versus deep copy - >>> var = xr.Variable(data=[1, 2, 3], dims="x") + >>> var = xr.NamedArray(data=[1, 2, 3], dims="x") >>> var.copy() - + array([1, 2, 3]) >>> var_0 = var.copy(deep=False) >>> var_0[0] = 7 >>> var_0 - + array([7, 2, 3]) >>> var - + array([7, 2, 3]) Changing the data using the ``data`` argument maintains the @@ -389,10 +420,10 @@ def copy( object is unaffected. >>> var.copy(data=[0.1, 0.2, 0.3]) - + array([0.1, 0.2, 0.3]) >>> var - + array([7, 2, 3]) See Also @@ -402,7 +433,7 @@ def copy( return self._copy(deep=deep, data=data) def _nonzero(self: T_NamedArray) -> tuple[T_NamedArray, ...]: - """Equivalent numpy's nonzero but returns a tuple of Variables.""" + """Equivalent numpy's nonzero but returns a tuple of NamedArrays.""" # TODO we should replace dask's native nonzero # after https://github.com/dask/dask/issues/1076 is implemented. nonzeros = np.nonzero(self.data) From f6186250e0d12528b6a13aee453c4ba67a51d241 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Mon, 25 Sep 2023 11:13:22 -0700 Subject: [PATCH 60/67] update error messages --- xarray/namedarray/core.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 4c3bc2f1978..3ec400145cc 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -36,9 +36,14 @@ def as_compatible_data( if is_duck_array(data): return data if isinstance(data, NamedArray): - raise ValueError + raise data.data if isinstance(data, np.ma.MaskedArray): - raise ValueError + mask = np.ma.getmaskarray(data) + if mask.any(): + # TODO: requires refactoring/vendoring xarray.core.dtypes and xarray.core.duck_array_ops + raise NotImplementedError("MaskedArray is not supported yet") + else: + data = np.asarray(data) if isinstance(data, ExplicitlyIndexed): # TODO: better that is_duck_array(ExplicitlyIndexed) -> True return typing.cast(T_DuckArray, data) @@ -150,7 +155,7 @@ def shape(self: T_NamedArray) -> tuple[int, ...]: Returns ------- shape : tuple of ints - Tuple of array dimensions. + Tuple of array dimensions. @@ -426,9 +431,6 @@ def copy( array([7, 2, 3]) - See Also - -------- - pandas.DataFrame.copy """ return self._copy(deep=deep, data=data) From 94bf6c4599d4ecf41cd9d158982b33544915f975 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Mon, 25 Sep 2023 11:47:11 -0700 Subject: [PATCH 61/67] update tests --- xarray/namedarray/core.py | 12 +++++++----- xarray/tests/test_namedarray.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 3ec400145cc..771ee3f6f84 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -33,17 +33,19 @@ def as_compatible_data( return typing.cast(T_DuckArray, data) # TODO : check scalar - if is_duck_array(data): - return data - if isinstance(data, NamedArray): - raise data.data + if isinstance(data, np.ma.MaskedArray): mask = np.ma.getmaskarray(data) if mask.any(): # TODO: requires refactoring/vendoring xarray.core.dtypes and xarray.core.duck_array_ops raise NotImplementedError("MaskedArray is not supported yet") else: - data = np.asarray(data) + return np.asarray(data) + if is_duck_array(data): + return data + if isinstance(data, NamedArray): + return data.data + if isinstance(data, ExplicitlyIndexed): # TODO: better that is_duck_array(ExplicitlyIndexed) -> True return typing.cast(T_DuckArray, data) diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py index 1e4d1ad7fa1..ada33d76707 100644 --- a/xarray/tests/test_namedarray.py +++ b/xarray/tests/test_namedarray.py @@ -15,6 +15,7 @@ def random_inputs() -> np.ndarray: [ ([1, 2, 3], np.array([1, 2, 3])), (np.array([4, 5, 6]), np.array([4, 5, 6])), + (NamedArray("time", np.array([1, 2, 3])), np.array([1, 2, 3])), ], ) def test_as_compatible_data( @@ -24,6 +25,19 @@ def test_as_compatible_data( assert np.array_equal(output, expected_output) +def test_as_compatible_data_with_masked_array(): + masked_array = np.ma.array([1, 2, 3], mask=[False, True, False]) + with pytest.raises(NotImplementedError): + as_compatible_data(masked_array) + + +def test_as_compatible_data_with_0d_object(): + data = np.empty((), dtype=object) + data[()] = (10, 12, 12) + output = as_compatible_data(data) + assert np.array_equal(output, data) + + def test_properties() -> None: data = 0.5 * np.arange(10).reshape(2, 5) named_array = NamedArray(["x", "y"], data, {"key": "value"}) @@ -72,6 +86,20 @@ def test_0d_string(data, dtype: np.typing.DTypeLike) -> None: assert named_array.dtype == dtype +def test_0d_object() -> None: + named_array = NamedArray([], (10, 12, 12)) + expected_data = np.empty((), dtype=object) + expected_data[()] = (10, 12, 12) + assert np.array_equal(named_array.data, expected_data) + + assert named_array.dims == () + assert named_array.sizes == {} + assert named_array.attrs == {} + assert named_array.ndim == 0 + assert named_array.size == 1 + assert named_array.dtype == np.dtype("O") + + def test_0d_datetime() -> None: named_array = NamedArray([], np.datetime64("2000-01-01")) assert named_array.dtype == np.dtype("datetime64[D]") From 0ec787613bd24bc4af8adc176ce5e33162721c0f Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Mon, 25 Sep 2023 15:20:27 -0700 Subject: [PATCH 62/67] test explicitly index array --- xarray/namedarray/core.py | 18 +++++------------- xarray/tests/test_namedarray.py | 22 ++++++++++++++++++++-- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 771ee3f6f84..377c59a0ebd 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -40,11 +40,11 @@ def as_compatible_data( # TODO: requires refactoring/vendoring xarray.core.dtypes and xarray.core.duck_array_ops raise NotImplementedError("MaskedArray is not supported yet") else: - return np.asarray(data) + return typing.cast(T_DuckArray, np.asarray(data)) if is_duck_array(data): return data if isinstance(data, NamedArray): - return data.data + return typing.cast(T_DuckArray, data.data) if isinstance(data, ExplicitlyIndexed): # TODO: better that is_duck_array(ExplicitlyIndexed) -> True @@ -409,29 +409,21 @@ def copy( -------- Shallow copy versus deep copy - >>> var = xr.NamedArray(data=[1, 2, 3], dims="x") + >>> var = xr.namedarray.core.NamedArray(data=[1, 2, 3], dims="x") >>> var.copy() - - array([1, 2, 3]) >>> var_0 = var.copy(deep=False) >>> var_0[0] = 7 >>> var_0 - - array([7, 2, 3]) >>> var - - array([7, 2, 3]) + Changing the data using the ``data`` argument maintains the structure of the original object, but with the new data. Original object is unaffected. >>> var.copy(data=[0.1, 0.2, 0.3]) - - array([0.1, 0.2, 0.3]) >>> var - - array([7, 2, 3]) + """ return self._copy(deep=deep, data=data) diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py index ada33d76707..e83958652a0 100644 --- a/xarray/tests/test_namedarray.py +++ b/xarray/tests/test_namedarray.py @@ -1,6 +1,7 @@ import numpy as np import pytest +import xarray as xr from xarray.namedarray.core import NamedArray, as_compatible_data from xarray.namedarray.utils import T_DuckArray @@ -25,19 +26,36 @@ def test_as_compatible_data( assert np.array_equal(output, expected_output) -def test_as_compatible_data_with_masked_array(): +def test_as_compatible_data_with_masked_array() -> None: masked_array = np.ma.array([1, 2, 3], mask=[False, True, False]) with pytest.raises(NotImplementedError): as_compatible_data(masked_array) -def test_as_compatible_data_with_0d_object(): +def test_as_compatible_data_with_0d_object() -> None: data = np.empty((), dtype=object) data[()] = (10, 12, 12) output = as_compatible_data(data) assert np.array_equal(output, data) +def test_as_compatible_data_with_explicitly_indexed(random_inputs) -> None: + class CustomArray(xr.core.indexing.NDArrayMixin): + def __init__(self, array): + self.array = array + + class CustomArrayIndexable(CustomArray, xr.core.indexing.ExplicitlyIndexed): + pass + + array = CustomArray(random_inputs) + output = as_compatible_data(array) + assert isinstance(output, np.ndarray) + + array = CustomArrayIndexable(random_inputs) + output = as_compatible_data(array) + assert isinstance(output, CustomArrayIndexable) + + def test_properties() -> None: data = 0.5 * np.arange(10).reshape(2, 5) named_array = NamedArray(["x", "y"], data, {"key": "value"}) From fb4ed126954464fd09fe7b2cae87c4ded6d675d7 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Mon, 25 Sep 2023 15:50:04 -0700 Subject: [PATCH 63/67] update tests --- xarray/namedarray/core.py | 30 ------------------------------ xarray/tests/test_namedarray.py | 4 ++-- 2 files changed, 2 insertions(+), 32 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 377c59a0ebd..7e22b5894a8 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -32,8 +32,6 @@ def as_compatible_data( # can't use fastpath (yet) for scalars return typing.cast(T_DuckArray, data) - # TODO : check scalar - if isinstance(data, np.ma.MaskedArray): mask = np.ma.getmaskarray(data) if mask.any(): @@ -92,15 +90,6 @@ def __init__( ValueError If the `dims` length does not match the number of data dimensions (ndim). - Example - ------- - >>> na = NamedArray("time", np.array([1, 2, 3]), attrs={"units": "seconds"}) - >>> na.dims - ('time',) - >>> na.data - array([1, 2, 3]) - >>> na.attrs - {'units': 'seconds'} """ self._data: T_DuckArray = as_compatible_data(data, fastpath=fastpath) @@ -405,25 +394,6 @@ def copy( New object with dimensions, attributes, and optionally data copied from original. - Examples - -------- - Shallow copy versus deep copy - - >>> var = xr.namedarray.core.NamedArray(data=[1, 2, 3], dims="x") - >>> var.copy() - >>> var_0 = var.copy(deep=False) - >>> var_0[0] = 7 - >>> var_0 - >>> var - - - Changing the data using the ``data`` argument maintains the - structure of the original object, but with the new data. Original - object is unaffected. - - >>> var.copy(data=[0.1, 0.2, 0.3]) - >>> var - """ return self._copy(deep=deep, data=data) diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py index e83958652a0..e37e6d7acdc 100644 --- a/xarray/tests/test_namedarray.py +++ b/xarray/tests/test_namedarray.py @@ -17,6 +17,7 @@ def random_inputs() -> np.ndarray: ([1, 2, 3], np.array([1, 2, 3])), (np.array([4, 5, 6]), np.array([4, 5, 6])), (NamedArray("time", np.array([1, 2, 3])), np.array([1, 2, 3])), + (2, np.array(2)), ], ) def test_as_compatible_data( @@ -35,8 +36,7 @@ def test_as_compatible_data_with_masked_array() -> None: def test_as_compatible_data_with_0d_object() -> None: data = np.empty((), dtype=object) data[()] = (10, 12, 12) - output = as_compatible_data(data) - assert np.array_equal(output, data) + np.array_equal(as_compatible_data(data), data) def test_as_compatible_data_with_explicitly_indexed(random_inputs) -> None: From f0cfc11ce58d6df5ed3397a00c5610d9cf3aeafd Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Mon, 25 Sep 2023 15:53:50 -0700 Subject: [PATCH 64/67] remove unused types --- xarray/namedarray/utils.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/xarray/namedarray/utils.py b/xarray/namedarray/utils.py index 40d91a16aa3..1495e111d85 100644 --- a/xarray/namedarray/utils.py +++ b/xarray/namedarray/utils.py @@ -16,10 +16,6 @@ # hopefully in the future we can narrow this down more T_DuckArray = typing.TypeVar("T_DuckArray", bound=typing.Any) -K = typing.TypeVar("K") -V = typing.TypeVar("V") -T = typing.TypeVar("T") - def module_available(module: str) -> bool: """Checks whether a module is installed without importing it. From 48fcf9b77f2985ba05e49f826785b9a651d03577 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe <13301940+andersy005@users.noreply.github.com> Date: Tue, 26 Sep 2023 09:31:10 -0700 Subject: [PATCH 65/67] Update xarray/tests/test_namedarray.py Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> --- xarray/tests/test_namedarray.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py index e37e6d7acdc..d4b7afc0c31 100644 --- a/xarray/tests/test_namedarray.py +++ b/xarray/tests/test_namedarray.py @@ -40,6 +40,7 @@ def test_as_compatible_data_with_0d_object() -> None: def test_as_compatible_data_with_explicitly_indexed(random_inputs) -> None: +# TODO: Make xr.core.indexing.ExplicitlyIndexed pass is_duck_array and remove this test. class CustomArray(xr.core.indexing.NDArrayMixin): def __init__(self, array): self.array = array From 2d9d7ff24c664077ba55090748df9eb365506354 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 26 Sep 2023 16:32:26 +0000 Subject: [PATCH 66/67] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/tests/test_namedarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py index d4b7afc0c31..0871a0c6fb9 100644 --- a/xarray/tests/test_namedarray.py +++ b/xarray/tests/test_namedarray.py @@ -40,7 +40,7 @@ def test_as_compatible_data_with_0d_object() -> None: def test_as_compatible_data_with_explicitly_indexed(random_inputs) -> None: -# TODO: Make xr.core.indexing.ExplicitlyIndexed pass is_duck_array and remove this test. + # TODO: Make xr.core.indexing.ExplicitlyIndexed pass is_duck_array and remove this test. class CustomArray(xr.core.indexing.NDArrayMixin): def __init__(self, array): self.array = array From 2ef5064f1adb0cd9f314fff7bfe9bd6c24caf5ae Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 26 Sep 2023 09:47:11 -0700 Subject: [PATCH 67/67] use Self --- xarray/namedarray/core.py | 93 +++++++++++++++++++++------------------ 1 file changed, 50 insertions(+), 43 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 7e22b5894a8..16a7b422f1b 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -2,6 +2,7 @@ import copy import math +import sys import typing from collections.abc import Hashable, Iterable, Mapping @@ -24,6 +25,18 @@ Dims = tuple[Hashable, ...] +try: + if sys.version_info >= (3, 11): + from typing import Self + else: + from typing_extensions import Self +except ImportError: + if typing.TYPE_CHECKING: + raise + else: + Self: typing.Any = None + + # TODO: Add tests! def as_compatible_data( data: T_DuckArray | np.typing.ArrayLike, fastpath: bool = False @@ -97,7 +110,7 @@ def __init__( self._attrs: dict | None = dict(attrs) if attrs else None @property - def ndim(self: T_NamedArray) -> int: + def ndim(self) -> int: """ Number of array dimensions. @@ -108,7 +121,7 @@ def ndim(self: T_NamedArray) -> int: return len(self.shape) @property - def size(self: T_NamedArray) -> int: + def size(self) -> int: """ Number of elements in the array. @@ -127,7 +140,7 @@ def __len__(self) -> int: raise TypeError("len() of unsized object") from exc @property - def dtype(self: T_NamedArray) -> np.dtype: + def dtype(self) -> np.dtype: """ Data-type of the array’s elements. @@ -139,7 +152,7 @@ def dtype(self: T_NamedArray) -> np.dtype: return self._data.dtype @property - def shape(self: T_NamedArray) -> tuple[int, ...]: + def shape(self) -> tuple[int, ...]: """ @@ -157,7 +170,7 @@ def shape(self: T_NamedArray) -> tuple[int, ...]: return self._data.shape @property - def nbytes(self: T_NamedArray) -> int: + def nbytes(self) -> int: """ Total bytes consumed by the elements of the data array. @@ -170,15 +183,15 @@ def nbytes(self: T_NamedArray) -> int: return self.size * self.dtype.itemsize @property - def dims(self: T_NamedArray) -> Dims: + def dims(self) -> Dims: """Tuple of dimension names with which this NamedArray is associated.""" return self._dims @dims.setter - def dims(self: T_NamedArray, value: DimsInput) -> None: + def dims(self, value: DimsInput) -> None: self._dims = self._parse_dimensions(value) - def _parse_dimensions(self: T_NamedArray, dims: DimsInput) -> Dims: + def _parse_dimensions(self, dims: DimsInput) -> Dims: dims = (dims,) if isinstance(dims, str) else tuple(dims) if len(dims) != self.ndim: raise ValueError( @@ -188,14 +201,14 @@ def _parse_dimensions(self: T_NamedArray, dims: DimsInput) -> Dims: return dims @property - def attrs(self: T_NamedArray) -> dict[typing.Any, typing.Any]: + def attrs(self) -> dict[typing.Any, typing.Any]: """Dictionary of local attributes on this NamedArray.""" if self._attrs is None: self._attrs = {} return self._attrs @attrs.setter - def attrs(self: T_NamedArray, value: Mapping) -> None: + def attrs(self, value: Mapping) -> None: self._attrs = dict(value) def _check_shape(self, new_data: T_DuckArray) -> None: @@ -206,7 +219,7 @@ def _check_shape(self, new_data: T_DuckArray) -> None: ) @property - def data(self: T_NamedArray): + def data(self): """ The NamedArray's data as an array. The underlying array type (e.g. dask, sparse, pint) is preserved. @@ -216,13 +229,13 @@ def data(self: T_NamedArray): return self._data @data.setter - def data(self: T_NamedArray, data: T_DuckArray | np.typing.ArrayLike) -> None: + def data(self, data: T_DuckArray | np.typing.ArrayLike) -> None: data = as_compatible_data(data) self._check_shape(data) self._data = data @property - def real(self: T_NamedArray) -> T_NamedArray: + def real(self) -> Self: """ The real part of the NamedArray. @@ -233,7 +246,7 @@ def real(self: T_NamedArray) -> T_NamedArray: return self._replace(data=self.data.real) @property - def imag(self: T_NamedArray) -> T_NamedArray: + def imag(self) -> Self: """ The imaginary part of the NamedArray. @@ -243,50 +256,48 @@ def imag(self: T_NamedArray) -> T_NamedArray: """ return self._replace(data=self.data.imag) - def __dask_tokenize__(self: T_NamedArray): + def __dask_tokenize__(self): # Use v.data, instead of v._data, in order to cope with the wrappers # around NetCDF and the like from dask.base import normalize_token return normalize_token((type(self), self._dims, self.data, self.attrs)) - def __dask_graph__(self: T_NamedArray): + def __dask_graph__(self): return self._data.__dask_graph__() if is_duck_dask_array(self._data) else None - def __dask_keys__(self: T_NamedArray): + def __dask_keys__(self): return self._data.__dask_keys__() - def __dask_layers__(self: T_NamedArray): + def __dask_layers__(self): return self._data.__dask_layers__() @property - def __dask_optimize__(self: T_NamedArray) -> typing.Callable: + def __dask_optimize__(self) -> typing.Callable: return self._data.__dask_optimize__ @property - def __dask_scheduler__(self: T_NamedArray) -> typing.Callable: + def __dask_scheduler__(self) -> typing.Callable: return self._data.__dask_scheduler__ def __dask_postcompute__( - self: T_NamedArray, + self, ) -> tuple[typing.Callable, tuple[typing.Any, ...]]: array_func, array_args = self._data.__dask_postcompute__() return self._dask_finalize, (array_func,) + array_args def __dask_postpersist__( - self: T_NamedArray, + self, ) -> tuple[typing.Callable, tuple[typing.Any, ...]]: array_func, array_args = self._data.__dask_postpersist__() return self._dask_finalize, (array_func,) + array_args - def _dask_finalize( - self: T_NamedArray, results, array_func, *args, **kwargs - ) -> T_NamedArray: + def _dask_finalize(self, results, array_func, *args, **kwargs) -> Self: data = array_func(results, *args, **kwargs) return type(self)(self._dims, data, attrs=self._attrs) @property - def chunks(self: T_NamedArray) -> tuple[tuple[int, ...], ...] | None: + def chunks(self) -> tuple[tuple[int, ...], ...] | None: """ Tuple of block lengths for this NamedArray's data, in order of dimensions, or None if the underlying data is not a dask array. @@ -301,7 +312,7 @@ def chunks(self: T_NamedArray) -> tuple[tuple[int, ...], ...] | None: @property def chunksizes( - self: T_NamedArray, + self, ) -> typing.Mapping[typing.Any, tuple[int, ...]]: """ Mapping from dimension names to block lengths for this namedArray's data, or None if @@ -323,13 +334,11 @@ def chunksizes( return {} @property - def sizes(self: T_NamedArray) -> dict[Hashable, int]: + def sizes(self) -> dict[Hashable, int]: """Ordered mapping from dimension names to lengths.""" return dict(zip(self.dims, self.shape)) - def _replace( - self: T_NamedArray, dims=_default, data=_default, attrs=_default - ) -> T_NamedArray: + def _replace(self, dims=_default, data=_default, attrs=_default) -> Self: if dims is _default: dims = copy.copy(self._dims) if data is _default: @@ -339,11 +348,11 @@ def _replace( return type(self)(dims, data, attrs) def _copy( - self: T_NamedArray, + self, deep: bool = True, data: T_DuckArray | np.typing.ArrayLike | None = None, memo: dict[int, typing.Any] | None = None, - ) -> T_NamedArray: + ) -> Self: if data is None: ndata = self._data if deep: @@ -358,19 +367,17 @@ def _copy( return self._replace(data=ndata, attrs=attrs) - def __copy__(self: T_NamedArray) -> T_NamedArray: + def __copy__(self) -> Self: return self._copy(deep=False) - def __deepcopy__( - self: T_NamedArray, memo: dict[int, typing.Any] | None = None - ) -> T_NamedArray: + def __deepcopy__(self, memo: dict[int, typing.Any] | None = None) -> Self: return self._copy(deep=True, memo=memo) def copy( - self: T_NamedArray, + self, deep: bool = True, data: T_DuckArray | np.typing.ArrayLike | None = None, - ) -> T_NamedArray: + ) -> Self: """Returns a copy of this object. If `deep=True`, the data array is loaded into memory and copied onto @@ -398,7 +405,7 @@ def copy( """ return self._copy(deep=deep, data=data) - def _nonzero(self: T_NamedArray) -> tuple[T_NamedArray, ...]: + def _nonzero(self) -> tuple[Self, ...]: """Equivalent numpy's nonzero but returns a tuple of NamedArrays.""" # TODO we should replace dask's native nonzero # after https://github.com/dask/dask/issues/1076 is implemented. @@ -406,10 +413,10 @@ def _nonzero(self: T_NamedArray) -> tuple[T_NamedArray, ...]: return tuple(type(self)((dim,), nz) for nz, dim in zip(nonzeros, self.dims)) def _as_sparse( - self: T_NamedArray, + self, sparse_format: str | Default = _default, fill_value=dtypes.NA, - ) -> T_NamedArray: + ) -> Self: """ use sparse-array as backend. """ @@ -431,7 +438,7 @@ def _as_sparse( data = as_sparse(self.data.astype(dtype), fill_value=fill_value) return self._replace(data=data) - def _to_dense(self: T_NamedArray) -> T_NamedArray: + def _to_dense(self) -> Self: """ Change backend from sparse to np.array """