Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

__slots__ #3250

Merged
merged 13 commits into from
Aug 29, 2019
18 changes: 18 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,24 @@ Breaking changes
- The ``inplace`` kwarg for public methods now raises an error, having been deprecated
since v0.11.0.
By `Maximilian Roos <https://github.com/max-sixty>`_
- Most xarray objects now define ``__slots__``. This reduces overall RAM usage by ~22%
(not counting the underlying numpy buffers); on CPython 3.7/x64, a trivial DataArray
has gone down from 1.9kB to 1.5kB.

Caveats:

- Pickle streams produced by older versions of xarray can't be loaded using this
release, and vice versa.
- Any user code that was accessing the ``__dict__`` attribute of
xarray objects will break. The best practice to attach custom metadata to xarray
objects is to use the ``attrs`` dictionary.
- Any user code that defines custom subclasses of xarray classes must now explicitly
define ``__slots__`` itself. Subclasses that don't add any attributes must state so
by defining ``__slots__ = ()`` right after the class header.
Omitting ``__slots__`` will now cause a ``FutureWarning`` to be logged, and a hard
crash in a later release.

(:issue:`3250`) by `Guido Imperiale <https://github.com/crusaderky>`_.

New functions/methods
~~~~~~~~~~~~~~~~~~~~~
Expand Down
2 changes: 2 additions & 0 deletions xarray/backends/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -694,6 +694,8 @@ def open_dataarray(


class _MultiFileCloser:
__slots__ = ("file_objs",)

def __init__(self, file_objs):
self.file_objs = file_objs

Expand Down
10 changes: 10 additions & 0 deletions xarray/backends/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,16 @@ def robust_getitem(array, key, catch=Exception, max_retries=6, initial_delay=500


class BackendArray(NdimSizeLenMixin, indexing.ExplicitlyIndexed):
__slots__ = ()

def __array__(self, dtype=None):
key = indexing.BasicIndexer((slice(None),) * self.ndim)
return np.asarray(self[key], dtype=dtype)


class AbstractDataStore(Mapping):
__slots__ = ()

def __iter__(self):
return iter(self.variables)

Expand Down Expand Up @@ -165,6 +169,8 @@ def __exit__(self, exception_type, exception_value, traceback):


class ArrayWriter:
__slots__ = ("sources", "targets", "regions", "lock")

def __init__(self, lock=None):
self.sources = []
self.targets = []
Expand Down Expand Up @@ -205,6 +211,8 @@ def sync(self, compute=True):


class AbstractWritableDataStore(AbstractDataStore):
__slots__ = ()

def encode(self, variables, attributes):
"""
Encode the variables and attributes in this store
Expand Down Expand Up @@ -371,6 +379,8 @@ def set_dimensions(self, variables, unlimited_dims=None):


class WritableCFDataStore(AbstractWritableDataStore):
__slots__ = ()

def encode(self, variables, attributes):
# All NetCDF files get CF encoded by default, without this attempting
# to write times, for example, would fail.
Expand Down
18 changes: 18 additions & 0 deletions xarray/backends/netCDF4_.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@


class BaseNetCDF4Array(BackendArray):
__slots__ = ("datastore", "dtype", "shape", "variable_name")

def __init__(self, variable_name, datastore):
self.datastore = datastore
self.variable_name = variable_name
Expand All @@ -52,8 +54,13 @@ def __setitem__(self, key, value):
if self.datastore.autoclose:
self.datastore.close(needs_lock=False)

def get_array(self, needs_lock=True):
raise NotImplementedError("Virtual Method")


class NetCDF4ArrayWrapper(BaseNetCDF4Array):
__slots__ = ()

def get_array(self, needs_lock=True):
ds = self.datastore._acquire(needs_lock)
variable = ds.variables[self.variable_name]
Expand Down Expand Up @@ -294,6 +301,17 @@ class NetCDF4DataStore(WritableCFDataStore):
This store supports NetCDF3, NetCDF4 and OpenDAP datasets.
"""

__slots__ = (
"autoclose",
"format",
"is_remote",
"lock",
"_filename",
"_group",
"_manager",
"_mode",
)

def __init__(
self, manager, group=None, mode=None, lock=NETCDF4_PYTHON_LOCK, autoclose=False
):
Expand Down
11 changes: 11 additions & 0 deletions xarray/backends/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ def _encode_zarr_attr_value(value):


class ZarrArrayWrapper(BackendArray):
__slots__ = ("datastore", "dtype", "shape", "variable_name")

def __init__(self, variable_name, datastore):
self.datastore = datastore
self.variable_name = variable_name
Expand Down Expand Up @@ -231,6 +233,15 @@ class ZarrStore(AbstractWritableDataStore):
"""Store for reading and writing data via zarr
"""

__slots__ = (
"append_dim",
"ds",
"_consolidate_on_close",
"_group",
"_read_only",
"_synchronizer",
)

@classmethod
def open_group(
cls,
Expand Down
4 changes: 4 additions & 0 deletions xarray/conventions.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ class NativeEndiannessArray(indexing.ExplicitlyIndexedNDArrayMixin):
dtype('int16')
"""

__slots__ = ("array",)

def __init__(self, array):
self.array = indexing.as_indexable(array)

Expand Down Expand Up @@ -60,6 +62,8 @@ class BoolTypeArray(indexing.ExplicitlyIndexedNDArrayMixin):
dtype('bool')
"""

__slots__ = ("array",)

def __init__(self, array):
self.array = indexing.as_indexable(array)

Expand Down
2 changes: 2 additions & 0 deletions xarray/core/accessor_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ class StringAccessor:

"""

__slots__ = ("_obj",)

def __init__(self, obj):
self._obj = obj

Expand Down
2 changes: 2 additions & 0 deletions xarray/core/arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ class SupportsArithmetic:
Used by Dataset, DataArray, Variable and GroupBy.
"""

__slots__ = ()

# TODO: implement special methods for arithmetic here rather than injecting
# them in xarray/core/ops.py. Ideally, do so by inheriting from
# numpy.lib.mixins.NDArrayOperatorsMixin.
Expand Down
89 changes: 74 additions & 15 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import warnings
from collections import OrderedDict
from contextlib import suppress
from textwrap import dedent
Expand Down Expand Up @@ -35,6 +36,8 @@


class ImplementsArrayReduce:
__slots__ = ()

@classmethod
def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool):
if include_skipna:
Expand Down Expand Up @@ -72,6 +75,8 @@ def wrapped_func(self, dim=None, axis=None, **kwargs): # type: ignore


class ImplementsDatasetReduce:
__slots__ = ()

@classmethod
def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool):
if include_skipna:
Expand Down Expand Up @@ -110,6 +115,8 @@ class AbstractArray(ImplementsArrayReduce):
"""Shared base class for DataArray and Variable.
"""

__slots__ = ()

def __bool__(self: Any) -> bool:
return bool(self.values)

Expand Down Expand Up @@ -180,7 +187,25 @@ class AttrAccessMixin:
"""Mixin class that allows getting keys with attribute access
"""

_initialized = False
__slots__ = ()

def __init_subclass__(cls):
"""Verify that all subclasses explicitly define ``__slots__``. If they don't,
raise error in the core xarray module and a FutureWarning in third-party
extensions.
This check is only triggered in Python 3.6+.
"""
if not hasattr(object.__new__(cls), "__dict__"):
cls.__setattr__ = cls._setattr_slots
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To clarify, in the future (Python 3.6+ only, post deprecation warning) will we be able to remove assignment to cls.__setattr__ from __init_subclass__? I'd rather avoid dynamically building methods if possible.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Post deprecation warning and after dropping support for py35, there will only be __setattr__, statically implemented with what is now the body of _setattr_slots. __init_subclass__ will retain only the health check.

elif cls.__module__.startswith("xarray."):
raise AttributeError("%s must explicitly define __slots__" % cls.__name__)
else:
cls.__setattr__ = cls._setattr_dict
warnings.warn(
"xarray subclass %s should explicitly define __slots__" % cls.__name__,
FutureWarning,
stacklevel=2,
)

@property
def _attr_sources(self) -> List[Mapping[Hashable, Any]]:
Expand All @@ -195,7 +220,7 @@ def _item_sources(self) -> List[Mapping[Hashable, Any]]:
return []

def __getattr__(self, name: str) -> Any:
if name != "__setstate__":
if name not in {"__dict__", "__setstate__"}:
# this avoids an infinite loop when pickle looks for the
# __setstate__ attribute before the xarray object is initialized
for source in self._attr_sources:
Expand All @@ -205,20 +230,52 @@ def __getattr__(self, name: str) -> Any:
"%r object has no attribute %r" % (type(self).__name__, name)
)

def __setattr__(self, name: str, value: Any) -> None:
if self._initialized:
try:
# Allow setting instance variables if they already exist
# (e.g., _attrs). We use __getattribute__ instead of hasattr
# to avoid key lookups with attribute-style access.
self.__getattribute__(name)
except AttributeError:
raise AttributeError(
"cannot set attribute %r on a %r object. Use __setitem__ "
"style assignment (e.g., `ds['name'] = ...`) instead to "
"assign variables." % (name, type(self).__name__)
)
# This complicated three-method design boosts overall performance of simple
# operations - particularly DataArray methods that perform a _to_temp_dataset()
# round-trip - by a whopping 8% compared to a single method that checks
# hasattr(self, "__dict__") at runtime before every single assignment (like
# _setattr_py35 does). All of this is just temporary until the FutureWarning can be
# changed into a hard crash.
def _setattr_dict(self, name: str, value: Any) -> None:
"""Deprecated third party subclass (see ``__init_subclass__`` above)
"""
object.__setattr__(self, name, value)
if name in self.__dict__:
# Custom, non-slotted attr, or improperly assigned variable?
warnings.warn(
"Setting attribute %r on a %r object. Explicitly define __slots__ "
"to suppress this warning for legitimate custom attributes and "
"raise an error when attempting variables assignments."
% (name, type(self).__name__),
FutureWarning,
stacklevel=2,
)

def _setattr_slots(self, name: str, value: Any) -> None:
"""Objects with ``__slots__`` raise AttributeError if you try setting an
undeclared attribute. This is desirable, but the error message could use some
improvement.
"""
try:
object.__setattr__(self, name, value)
except AttributeError as e:
# Don't accidentally shadow custom AttributeErrors, e.g.
# DataArray.dims.setter
if str(e) != "%r object has no attribute %r" % (type(self).__name__, name):
raise
raise AttributeError(
"cannot set attribute %r on a %r object. Use __setitem__ style"
"assignment (e.g., `ds['name'] = ...`) instead of assigning variables."
% (name, type(self).__name__)
) from e

def _setattr_py35(self, name: str, value: Any) -> None:
if hasattr(self, "__dict__"):
return self._setattr_dict(name, value)
return self._setattr_slots(name, value)

# Overridden in Python >=3.6 by __init_subclass__
__setattr__ = _setattr_py35

def __dir__(self) -> List[str]:
"""Provide method name lookup and completion. Only provide 'public'
Expand Down Expand Up @@ -283,6 +340,8 @@ def get_squeeze_dims(
class DataWithCoords(SupportsArithmetic, AttrAccessMixin):
"""Shared base class for Dataset and DataArray."""

__slots__ = ()

_rolling_exp_cls = RollingExp

def squeeze(
Expand Down
8 changes: 8 additions & 0 deletions xarray/core/computation.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,14 @@ class _UFuncSignature:
Core dimension names on each output variable.
"""

__slots__ = (
"input_core_dims",
"output_core_dims",
"_all_input_core_dims",
"_all_output_core_dims",
"_all_core_dims",
)

def __init__(self, input_core_dims, output_core_dims=((),)):
self.input_core_dims = tuple(tuple(a) for a in input_core_dims)
self.output_core_dims = tuple(tuple(a) for a in output_core_dims)
Expand Down
14 changes: 8 additions & 6 deletions xarray/core/coordinates.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@


class AbstractCoordinates(Mapping[Hashable, "DataArray"]):
_data = None # type: Union["DataArray", "Dataset"]
__slots__ = ()

def __getitem__(self, key: Hashable) -> "DataArray":
raise NotImplementedError()
Expand All @@ -53,7 +53,7 @@ def dims(self) -> Union[Mapping[Hashable, int], Tuple[Hashable, ...]]:

@property
def indexes(self) -> Indexes:
return self._data.indexes
return self._data.indexes # type: ignore

@property
def variables(self):
Expand Down Expand Up @@ -108,9 +108,9 @@ def to_index(self, ordered_dims: Sequence[Hashable] = None) -> pd.Index:
raise ValueError("no valid index for a 0-dimensional object")
elif len(ordered_dims) == 1:
(dim,) = ordered_dims
return self._data.get_index(dim)
return self._data.get_index(dim) # type: ignore
else:
indexes = [self._data.get_index(k) for k in ordered_dims]
indexes = [self._data.get_index(k) for k in ordered_dims] # type: ignore
names = list(ordered_dims)
return pd.MultiIndex.from_product(indexes, names=names)

Expand Down Expand Up @@ -187,7 +187,7 @@ class DatasetCoordinates(AbstractCoordinates):
objects.
"""

_data = None # type: Dataset
__slots__ = ("_data",)

def __init__(self, dataset: "Dataset"):
self._data = dataset
Expand Down Expand Up @@ -258,7 +258,7 @@ class DataArrayCoordinates(AbstractCoordinates):
dimensions and the values given by corresponding DataArray objects.
"""

_data = None # type: DataArray
__slots__ = ("_data",)

def __init__(self, dataarray: "DataArray"):
self._data = dataarray
Expand Down Expand Up @@ -314,6 +314,8 @@ class LevelCoordinatesSource(Mapping[Hashable, Any]):
by any public methods.
"""

__slots__ = ("_data",)

def __init__(self, data_object: "Union[DataArray, Dataset]"):
self._data = data_object

Expand Down
Loading