From a9a10af1194b976abb9bf126aa02fda8d40277b4 Mon Sep 17 00:00:00 2001 From: Isaac Virshup Date: Mon, 4 Sep 2023 15:39:23 +0200 Subject: [PATCH 01/33] Make creation of aligned mapping lazy --- anndata/_core/aligned_mapping.py | 61 +++++++++--- anndata/_core/anndata.py | 160 +++---------------------------- anndata/_core/raw.py | 20 ++-- anndata/_io/write.py | 4 +- 4 files changed, 74 insertions(+), 171 deletions(-) diff --git a/anndata/_core/aligned_mapping.py b/anndata/_core/aligned_mapping.py index 156146518..dfdcc6c0d 100644 --- a/anndata/_core/aligned_mapping.py +++ b/anndata/_core/aligned_mapping.py @@ -10,7 +10,7 @@ import pandas as pd from scipy.sparse import spmatrix -from ..utils import deprecated, ensure_df_homogeneous, dim_len +from ..utils import deprecated, ensure_df_homogeneous, dim_len, convert_to_dict from . import raw, anndata from .views import as_view, view_update from .access import ElementRef @@ -108,7 +108,7 @@ def parent(self) -> Union["anndata.AnnData", "raw.Raw"]: return self._parent def copy(self): - d = self._actual_class(self.parent, self._axis) + d = self._actual_class(self.parent, self._axis, {}) for k, v in self.items(): if isinstance(v, AwkArray): # Shallow copy since awkward array buffers are immutable @@ -262,6 +262,7 @@ def dim_names(self) -> pd.Index: return (self.parent.obs_names, self.parent.var_names)[self._axis] +# TODO: vals can't be None class AxisArrays(AlignedActualMixin, AxisArraysBase): def __init__( self, @@ -273,9 +274,9 @@ def __init__( if axis not in (0, 1): raise ValueError() self._axis = axis - self._data = dict() - if vals is not None: - self.update(vals) + for k, v in vals.items(): + vals[k] = self._validate_value(v, k) + self._data = vals class AxisArraysView(AlignedViewMixin, AxisArraysBase): @@ -307,18 +308,21 @@ class LayersBase(AlignedMapping): # TODO: I thought I had a more elegant solution to overriding this... def copy(self) -> "Layers": - d = self._actual_class(self.parent) + d = self._actual_class(self.parent, vals={}) for k, v in self.items(): d[k] = v.copy() return d class Layers(AlignedActualMixin, LayersBase): - def __init__(self, parent: "anndata.AnnData", vals: Optional[Mapping] = None): + def __init__( + self, parent: "anndata.AnnData", axis=(0, 1), vals: Optional[Mapping] = None + ): + assert axis == (0, 1), axis self._parent = parent - self._data = dict() - if vals is not None: - self.update(vals) + for k, v in vals.items(): + vals[k] = self._validate_value(v, k) + self._data = vals class LayersView(AlignedViewMixin, LayersBase): @@ -372,9 +376,9 @@ def __init__( if axis not in (0, 1): raise ValueError() self._axis = axis - self._data = dict() - if vals is not None: - self.update(vals) + for k, v in vals.items(): + vals[k] = self._validate_value(v, k) + self._data = vals class PairwiseArraysView(AlignedViewMixin, PairwiseArraysBase): @@ -386,9 +390,38 @@ def __init__( ): self.parent_mapping = parent_mapping self._parent = parent_view - self.subset_idx = (subset_idx, subset_idx) + self.subset_idx = subset_idx self._axis = parent_mapping._axis PairwiseArraysBase._view_class = PairwiseArraysView PairwiseArraysBase._actual_class = PairwiseArrays + + +class AlignedMappingProperty: + def __init__(self, name, cls, axis): + self.name = name + self.axis = axis + self.cls = cls + + def __get__(self, obj, objtype=None): + if obj.is_view: + parent_anndata = obj._adata_ref + idxs = (obj._oidx, obj._vidx) + parent_aligned_mapping = getattr(parent_anndata, self.name) + return parent_aligned_mapping._view( + obj, tuple(idxs[ax] for ax in parent_aligned_mapping.axes) + ) + # return self.cls._view_class() + else: + return self.cls(obj, self.axis, getattr(obj, "_" + self.name)) + + def __set__(self, obj, value): + value = convert_to_dict(value) + _ = self.cls(obj, self.axis, value) # Validate + if obj.is_view: + obj._init_as_actual(obj.copy()) + setattr(obj, "_" + self.name, value) + + def __delete__(self, obj): + setattr(obj, self.name, dict()) diff --git a/anndata/_core/anndata.py b/anndata/_core/anndata.py index b19381b0b..d40ec6398 100644 --- a/anndata/_core/anndata.py +++ b/anndata/_core/anndata.py @@ -32,11 +32,9 @@ from .access import ElementRef from .aligned_mapping import ( AxisArrays, - AxisArraysView, PairwiseArrays, - PairwiseArraysView, Layers, - LayersView, + AlignedMappingProperty, ) from .views import ( ArrayView, @@ -47,7 +45,7 @@ ) from .sparse_dataset import SparseDataset from .. import utils -from ..utils import convert_to_dict, ensure_df_homogeneous, dim_len +from ..utils import ensure_df_homogeneous, dim_len from ..logging import anndata_logger as logger from ..compat import ( ZarrArray, @@ -343,11 +341,6 @@ def _init_as_view(self, adata_ref: "AnnData", oidx: Index, vidx: Index): # views on attributes of adata_ref obs_sub = adata_ref.obs.iloc[oidx] var_sub = adata_ref.var.iloc[vidx] - self._obsm = adata_ref.obsm._view(self, (oidx,)) - self._varm = adata_ref.varm._view(self, (vidx,)) - self._layers = adata_ref.layers._view(self, (oidx, vidx)) - self._obsp = adata_ref.obsp._view(self, oidx) - self._varp = adata_ref.varp._view(self, vidx) # fix categories uns = copy(adata_ref._uns) self._remove_unused_categories(adata_ref.obs, obs_sub, uns) @@ -506,12 +499,11 @@ def _init_as_actual( # unstructured annotations self.uns = uns or OrderedDict() - # TODO: Think about consequences of making obsm a group in hdf - self._obsm = AxisArrays(self, 0, vals=convert_to_dict(obsm)) - self._varm = AxisArrays(self, 1, vals=convert_to_dict(varm)) + self.obsm = obsm + self.varm = varm - self._obsp = PairwiseArrays(self, 0, vals=convert_to_dict(obsp)) - self._varp = PairwiseArrays(self, 1, vals=convert_to_dict(varp)) + self.obsp = obsp + self.varp = varp # Backwards compat for connectivities matrices in uns["neighbors"] _move_adj_mtx({"uns": self._uns, "obsp": self._obsp}) @@ -536,7 +528,7 @@ def _init_as_actual( self._clean_up_old_format(uns) # layers - self._layers = Layers(self, layers) + self.layers = layers def __sizeof__(self, show_stratified=None) -> int: def get_size(X): @@ -696,45 +688,11 @@ def X(self, value: Optional[Union[np.ndarray, sparse.spmatrix]]): def X(self): self.X = None - @property - def layers(self) -> Union[Layers, LayersView]: - """\ - Dictionary-like object with values of the same dimensions as :attr:`X`. - - Layers in AnnData are inspired by loompy’s :ref:`loomlayers`. - - Return the layer named `"unspliced"`:: - - adata.layers["unspliced"] - - Create or replace the `"spliced"` layer:: - - adata.layers["spliced"] = ... - - Assign the 10th column of layer `"spliced"` to the variable a:: - - a = adata.layers["spliced"][:, 10] - - Delete the `"spliced"` layer:: - - del adata.layers["spliced"] - - Return layers’ names:: - - adata.layers.keys() - """ - return self._layers - - @layers.setter - def layers(self, value): - layers = Layers(self, vals=convert_to_dict(value)) - if self.is_view: - self._init_as_actual(self.copy()) - self._layers = layers - - @layers.deleter - def layers(self): - self.layers = dict() + obsm = AlignedMappingProperty("obsm", AxisArrays, 0) + varm = AlignedMappingProperty("varm", AxisArrays, 1) + layers = AlignedMappingProperty("layers", Layers, (0, 1)) + obsp = AlignedMappingProperty("obsp", PairwiseArrays, 0) + varp = AlignedMappingProperty("varp", PairwiseArrays, 1) @property def raw(self) -> Raw: @@ -845,7 +803,7 @@ def _set_dim_index(self, value: pd.Index, attr: str): if self.is_view: self._init_as_actual(self.copy()) getattr(self, attr).index = value - for v in getattr(self, f"{attr}m").values(): + for v in getattr(self, f"_{attr}m").values(): if isinstance(v, pd.DataFrame): v.index = value @@ -919,98 +877,6 @@ def uns(self, value: MutableMapping): def uns(self): self.uns = OrderedDict() - @property - def obsm(self) -> Union[AxisArrays, AxisArraysView]: - """\ - Multi-dimensional annotation of observations - (mutable structured :class:`~numpy.ndarray`). - - Stores for each key a two or higher-dimensional :class:`~numpy.ndarray` - of length `n_obs`. - Is sliced with `data` and `obs` but behaves otherwise like a :term:`mapping`. - """ - return self._obsm - - @obsm.setter - def obsm(self, value): - obsm = AxisArrays(self, 0, vals=convert_to_dict(value)) - if self.is_view: - self._init_as_actual(self.copy()) - self._obsm = obsm - - @obsm.deleter - def obsm(self): - self.obsm = dict() - - @property - def varm(self) -> Union[AxisArrays, AxisArraysView]: - """\ - Multi-dimensional annotation of variables/features - (mutable structured :class:`~numpy.ndarray`). - - Stores for each key a two or higher-dimensional :class:`~numpy.ndarray` - of length `n_vars`. - Is sliced with `data` and `var` but behaves otherwise like a :term:`mapping`. - """ - return self._varm - - @varm.setter - def varm(self, value): - varm = AxisArrays(self, 1, vals=convert_to_dict(value)) - if self.is_view: - self._init_as_actual(self.copy()) - self._varm = varm - - @varm.deleter - def varm(self): - self.varm = dict() - - @property - def obsp(self) -> Union[PairwiseArrays, PairwiseArraysView]: - """\ - Pairwise annotation of observations, - a mutable mapping with array-like values. - - Stores for each key a two or higher-dimensional :class:`~numpy.ndarray` - whose first two dimensions are of length `n_obs`. - Is sliced with `data` and `obs` but behaves otherwise like a :term:`mapping`. - """ - return self._obsp - - @obsp.setter - def obsp(self, value): - obsp = PairwiseArrays(self, 0, vals=convert_to_dict(value)) - if self.is_view: - self._init_as_actual(self.copy()) - self._obsp = obsp - - @obsp.deleter - def obsp(self): - self.obsp = dict() - - @property - def varp(self) -> Union[PairwiseArrays, PairwiseArraysView]: - """\ - Pairwise annotation of variables/features, - a mutable mapping with array-like values. - - Stores for each key a two or higher-dimensional :class:`~numpy.ndarray` - whose first two dimensions are of length `n_var`. - Is sliced with `data` and `var` but behaves otherwise like a :term:`mapping`. - """ - return self._varp - - @varp.setter - def varp(self, value): - varp = PairwiseArrays(self, 1, vals=convert_to_dict(value)) - if self.is_view: - self._init_as_actual(self.copy()) - self._varp = varp - - @varp.deleter - def varp(self): - self.varp = dict() - def obs_keys(self) -> List[str]: """List keys of observation annotation :attr:`obs`.""" return self._obs.keys().tolist() diff --git a/anndata/_core/raw.py b/anndata/_core/raw.py index f248b99be..c9df14529 100644 --- a/anndata/_core/raw.py +++ b/anndata/_core/raw.py @@ -8,7 +8,7 @@ from . import anndata from .index import _normalize_index, _subset, unpack_index, get_vector -from .aligned_mapping import AxisArrays +from .aligned_mapping import AxisArrays, AlignedMappingProperty from .sparse_dataset import SparseDataset from ..compat import CupyArray, CupySparseMatrix @@ -35,7 +35,8 @@ def __init__( else: self._X = X self._var = _gen_dataframe(var, self.X.shape[1], ["var_names"]) - self._varm = AxisArrays(self, 1, varm) + self.varm = varm + # self._varm = AxisArrays(self, 1, varm) elif X is None: # construct from adata # Move from GPU to CPU since it's large and not always used if isinstance(adata.X, (CupyArray, CupySparseMatrix)): @@ -43,7 +44,8 @@ def __init__( else: self._X = adata.X.copy() self._var = adata.var.copy() - self._varm = AxisArrays(self, 1, adata.varm.copy()) + self.varm = adata.varm.copy() + # self._varm = AxisArrays(self, 1, adata.varm.copy()) elif adata.isbacked: raise ValueError("Cannot specify X if adata is backed") @@ -95,9 +97,7 @@ def n_vars(self): def n_obs(self): return self._n_obs - @property - def varm(self): - return self._varm + varm = AlignedMappingProperty("varm", AxisArrays, 1) @property def var_names(self): @@ -123,11 +123,15 @@ def __getitem__(self, index): var = self._var.iloc[vidx] new = Raw(self._adata, X=X, var=var) - if self._varm is not None: + if self.varm is not None: # Since there is no view of raws - new._varm = self._varm._view(_RawViewHack(self, vidx), (vidx,)).copy() + new.varm = self.varm._view(_RawViewHack(self, vidx), (vidx,)).copy() return new + @property + def is_view(self): + return False + def __str__(self): descr = f"Raw AnnData with n_obs × n_vars = {self.n_obs} × {self.n_vars}" for attr in ["var", "varm"]: diff --git a/anndata/_io/write.py b/anndata/_io/write.py index 28ce012d0..1ffd95553 100644 --- a/anndata/_io/write.py +++ b/anndata/_io/write.py @@ -30,8 +30,8 @@ def write_csvs( d = dict( obs=adata._obs, var=adata._var, - obsm=adata._obsm.to_df(), - varm=adata._varm.to_df(), + obsm=adata.obsm.to_df(), + varm=adata.varm.to_df(), ) if not skip_data: d["X"] = pd.DataFrame(adata.X.toarray() if issparse(adata.X) else adata.X) From ca0759a1cc14616a0141373f9d3d2a32d43d37fd Mon Sep 17 00:00:00 2001 From: Isaac Virshup Date: Mon, 4 Sep 2023 16:17:15 +0200 Subject: [PATCH 02/33] Use weakref for filemanager --- anndata/_core/file_backing.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/anndata/_core/file_backing.py b/anndata/_core/file_backing.py index 02401873c..03e98bfea 100644 --- a/anndata/_core/file_backing.py +++ b/anndata/_core/file_backing.py @@ -3,6 +3,7 @@ from pathlib import Path from typing import Optional, Union, Iterator, Literal from collections.abc import Mapping +import weakref import h5py @@ -20,13 +21,26 @@ def __init__( filename: Optional[PathLike] = None, filemode: Optional[Literal["r", "r+"]] = None, ): - self._adata = adata + self._adata_ref = weakref.ref(adata) self.filename = filename self._filemode = filemode self._file = None if filename: self.open() + def __getstate__(self): + state = self.__dict__.copy() + state["_adata_ref"] = state["_adata_ref"]() + return state + + def __setstate__(self, state): + self.__dict__ = state.copy() + self.__dict__["_adata_ref"] = weakref.ref(state["_adata_ref"]) + + @property + def _adata(self): + return self._adata_ref() + def __repr__(self) -> str: if self.filename is None: return "Backing file manager: no file is set." From cb53b779231e79d1409e9768878e5896c520d6d4 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Mon, 3 Jun 2024 16:24:31 +0200 Subject: [PATCH 03/33] (chore): add benchmark --- benchmarks/benchmarks/anndata.py | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 benchmarks/benchmarks/anndata.py diff --git a/benchmarks/benchmarks/anndata.py b/benchmarks/benchmarks/anndata.py new file mode 100644 index 000000000..89ed6d655 --- /dev/null +++ b/benchmarks/benchmarks/anndata.py @@ -0,0 +1,9 @@ +from __future__ import annotations + +from anndata.tests.helpers import gen_adata + + +class GarbargeCollectionSuite: + def track_peakmem_write_compressed(self, *_): + for _ in range(50): + adata = gen_adata((10000, 10000)) # noqa: F841 From 2df03b51685a1f51fb97710eb768a0cc04ac75f9 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Mon, 3 Jun 2024 16:37:28 +0200 Subject: [PATCH 04/33] (fix): use right `gen_adata` function --- benchmarks/benchmarks/anndata.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/benchmarks/anndata.py b/benchmarks/benchmarks/anndata.py index 89ed6d655..38d6e62fe 100644 --- a/benchmarks/benchmarks/anndata.py +++ b/benchmarks/benchmarks/anndata.py @@ -1,9 +1,9 @@ from __future__ import annotations -from anndata.tests.helpers import gen_adata +from .utils import gen_adata class GarbargeCollectionSuite: def track_peakmem_write_compressed(self, *_): for _ in range(50): - adata = gen_adata((10000, 10000)) # noqa: F841 + adata = gen_adata(10000, 10000, "X-csc") # noqa: F841 From 38d218fc74caa3d0b892324454331b6034bc2546 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Mon, 3 Jun 2024 16:41:13 +0200 Subject: [PATCH 05/33] (fix): change name --- benchmarks/benchmarks/anndata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/benchmarks/anndata.py b/benchmarks/benchmarks/anndata.py index 38d6e62fe..de52b68db 100644 --- a/benchmarks/benchmarks/anndata.py +++ b/benchmarks/benchmarks/anndata.py @@ -4,6 +4,6 @@ class GarbargeCollectionSuite: - def track_peakmem_write_compressed(self, *_): + def track_peakmem_garbage_collection(self, *_): for _ in range(50): adata = gen_adata(10000, 10000, "X-csc") # noqa: F841 From a9eba7aa1943b125ad0a3a4669a60315832469b4 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Mon, 3 Jun 2024 16:42:27 +0200 Subject: [PATCH 06/33] (chore): fewer runs --- benchmarks/benchmarks/anndata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/benchmarks/anndata.py b/benchmarks/benchmarks/anndata.py index de52b68db..26d27924a 100644 --- a/benchmarks/benchmarks/anndata.py +++ b/benchmarks/benchmarks/anndata.py @@ -5,5 +5,5 @@ class GarbargeCollectionSuite: def track_peakmem_garbage_collection(self, *_): - for _ in range(50): + for _ in range(10): adata = gen_adata(10000, 10000, "X-csc") # noqa: F841 From be8ae5558279e27a26b953ac621fd859c6d040fd Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 4 Jun 2024 11:17:54 +0200 Subject: [PATCH 07/33] (fix): benchmark test name --- benchmarks/benchmarks/anndata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/benchmarks/anndata.py b/benchmarks/benchmarks/anndata.py index 26d27924a..5311736d4 100644 --- a/benchmarks/benchmarks/anndata.py +++ b/benchmarks/benchmarks/anndata.py @@ -4,6 +4,6 @@ class GarbargeCollectionSuite: - def track_peakmem_garbage_collection(self, *_): + def peakmem_garbage_collection(self, *_): for _ in range(10): adata = gen_adata(10000, 10000, "X-csc") # noqa: F841 From e0bff0e15398ed99d325813d186a1e16bfe70f61 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 4 Jun 2024 11:48:57 +0200 Subject: [PATCH 08/33] (fix): return `cls` for `None` obj --- src/anndata/_core/aligned_mapping.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/anndata/_core/aligned_mapping.py b/src/anndata/_core/aligned_mapping.py index fa8567be7..0869cd900 100644 --- a/src/anndata/_core/aligned_mapping.py +++ b/src/anndata/_core/aligned_mapping.py @@ -421,6 +421,8 @@ def __init__(self, name, cls, axis): self.cls = cls def __get__(self, obj, objtype=None): + if obj is None: # None check needed for AnnData.layers accessors + return self.cls if obj.is_view: parent_anndata = obj._adata_ref idxs = (obj._oidx, obj._vidx) @@ -428,7 +430,6 @@ def __get__(self, obj, objtype=None): return parent_aligned_mapping._view( obj, tuple(idxs[ax] for ax in parent_aligned_mapping.axes) ) - # return self.cls._view_class() else: return self.cls(obj, self.axis, getattr(obj, "_" + self.name)) From 6ed99634ceabd810203bde22606adcad2db1e230 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 4 Jun 2024 14:08:35 +0200 Subject: [PATCH 09/33] (fix): try `track_peakmem` --- benchmarks/benchmarks/anndata.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/benchmarks/benchmarks/anndata.py b/benchmarks/benchmarks/anndata.py index 5311736d4..7f33e9bac 100644 --- a/benchmarks/benchmarks/anndata.py +++ b/benchmarks/benchmarks/anndata.py @@ -1,9 +1,13 @@ from __future__ import annotations -from .utils import gen_adata +from .utils import gen_adata, get_peak_mem, sedate + + +def peakmem_garbage_collection(): + for _ in range(10): + adata = gen_adata(10000, 10000, "X-csc") # noqa: F841 class GarbargeCollectionSuite: - def peakmem_garbage_collection(self, *_): - for _ in range(10): - adata = gen_adata(10000, 10000, "X-csc") # noqa: F841 + def track_peakmem_write_compressed(self, *_): + return get_peak_mem((sedate(peakmem_garbage_collection), (), {})) From ce4e2d877e5e6d620496c7adecc8e8842eace07f Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Wed, 5 Jun 2024 14:14:51 +0200 Subject: [PATCH 10/33] (fix): remove `track_` name --- benchmarks/benchmarks/anndata.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/benchmarks/anndata.py b/benchmarks/benchmarks/anndata.py index 7f33e9bac..20eb02d4d 100644 --- a/benchmarks/benchmarks/anndata.py +++ b/benchmarks/benchmarks/anndata.py @@ -3,11 +3,11 @@ from .utils import gen_adata, get_peak_mem, sedate -def peakmem_garbage_collection(): +def run_garbage_collection(): for _ in range(10): adata = gen_adata(10000, 10000, "X-csc") # noqa: F841 class GarbargeCollectionSuite: def track_peakmem_write_compressed(self, *_): - return get_peak_mem((sedate(peakmem_garbage_collection), (), {})) + return get_peak_mem((sedate(run_garbage_collection), (), {})) From 0c9e563f6dab6af3a55512b37db293ebc40907b9 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Wed, 5 Jun 2024 14:49:11 +0200 Subject: [PATCH 11/33] (fix): docs --- src/anndata/_core/anndata.py | 134 +++++++++++++++++++++++++++++++++-- 1 file changed, 129 insertions(+), 5 deletions(-) diff --git a/src/anndata/_core/anndata.py b/src/anndata/_core/anndata.py index 823987703..dca8cf48a 100644 --- a/src/anndata/_core/anndata.py +++ b/src/anndata/_core/anndata.py @@ -46,8 +46,11 @@ from .aligned_mapping import ( AlignedMappingProperty, AxisArrays, + AxisArraysView, Layers, + LayersView, PairwiseArrays, + PairwiseArraysView, ) from .file_backing import AnnDataFileManager, to_memory from .index import Index, Index1D, _normalize_indices, _subset, get_vector @@ -667,11 +670,132 @@ def X(self, value: np.ndarray | sparse.spmatrix | SpArray | None): def X(self): self.X = None - obsm = AlignedMappingProperty("obsm", AxisArrays, 0) - varm = AlignedMappingProperty("varm", AxisArrays, 1) - layers = AlignedMappingProperty("layers", Layers, (0, 1)) - obsp = AlignedMappingProperty("obsp", PairwiseArrays, 0) - varp = AlignedMappingProperty("varp", PairwiseArrays, 1) + __obsm__alignedmapping_property = AlignedMappingProperty("obsm", AxisArrays, 0) + + @property + def obsm(self) -> AxisArrays | AxisArraysView: + """\ + Multi-dimensional annotation of observations + (mutable structured :class:`~numpy.ndarray`). + + Stores for each key a two or higher-dimensional :class:`~numpy.ndarray` + of length `n_obs`. + Is sliced with `data` and `obs` but behaves otherwise like a :term:`mapping`. + """ + return self.__obsm__alignedmapping_property + + @obsm.setter + def obsm(self, val): + self.__obsm__alignedmapping_property = val + + @obsm.deleter + def obsm(self): + del self.__obsm__alignedmapping_property + + __varm__alignedmapping_property = AlignedMappingProperty("varm", AxisArrays, 1) + + @property + def varm(self) -> AxisArrays | AxisArraysView: + """\ + Multi-dimensional annotation of variables/features + (mutable structured :class:`~numpy.ndarray`). + + Stores for each key a two or higher-dimensional :class:`~numpy.ndarray` + of length `n_vars`. + Is sliced with `data` and `var` but behaves otherwise like a :term:`mapping`. + """ + return self.__varm__alignedmapping_property + + @varm.setter + def varm(self, val): + self.__varm__alignedmapping_property = val + + @varm.deleter + def varm(self): + del self.__varm__alignedmapping_property + + __layers__alignedmapping_property = AlignedMappingProperty("layers", Layers, (0, 1)) + + @property + def layers(self) -> Layers | LayersView: + """\ + Dictionary-like object with values of the same dimensions as :attr:`X`. + + Layers in AnnData are inspired by loompy’s :ref:`loomlayers`. + + Return the layer named `"unspliced"`:: + + adata.layers["unspliced"] + + Create or replace the `"spliced"` layer:: + + adata.layers["spliced"] = ... + + Assign the 10th column of layer `"spliced"` to the variable a:: + + a = adata.layers["spliced"][:, 10] + + Delete the `"spliced"` layer:: + + del adata.layers["spliced"] + + Return layers’ names:: + + adata.layers.keys() + """ + return self.__layers__alignedmapping_property + + @layers.setter + def layers(self, val): + self.__layers__alignedmapping_property = val + + @layers.deleter + def layers(self): + del self.__layers__alignedmapping_property + + __obsp__alignedmapping_property = AlignedMappingProperty("obsp", PairwiseArrays, 0) + + @property + def obsp(self) -> PairwiseArrays | PairwiseArraysView: + """\ + Pairwise annotation of observations, + a mutable mapping with array-like values. + + Stores for each key a two or higher-dimensional :class:`~numpy.ndarray` + whose first two dimensions are of length `n_obs`. + Is sliced with `data` and `obs` but behaves otherwise like a :term:`mapping`. + """ + return self.__obsp__alignedmapping_property + + @obsp.setter + def obsp(self, val): + self.__obsp__alignedmapping_property = val + + @obsp.deleter + def obsp(self): + del self.__obsp__alignedmapping_property + + __varp__alignedmapping_property = AlignedMappingProperty("varp", PairwiseArrays, 1) + + @property + def varp(self) -> PairwiseArrays | PairwiseArraysView: + """\ + Pairwise annotation of variables/features, + a mutable mapping with array-like values. + + Stores for each key a two or higher-dimensional :class:`~numpy.ndarray` + whose first two dimensions are of length `n_var`. + Is sliced with `data` and `var` but behaves otherwise like a :term:`mapping`. + """ + return self.__varp__alignedmapping_property + + @varp.setter + def varp(self, val): + self.__varp__alignedmapping_property = val + + @varp.deleter + def varp(self): + del self.__varp__alignedmapping_property @property def raw(self) -> Raw: From c971dfbc318bbf06b49dd1b014ff17889bcd98ed Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Wed, 5 Jun 2024 15:29:35 +0200 Subject: [PATCH 12/33] (fix): do custom peakmem track --- benchmarks/benchmarks/anndata.py | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/benchmarks/benchmarks/anndata.py b/benchmarks/benchmarks/anndata.py index 20eb02d4d..a38256cd8 100644 --- a/benchmarks/benchmarks/anndata.py +++ b/benchmarks/benchmarks/anndata.py @@ -1,13 +1,33 @@ from __future__ import annotations -from .utils import gen_adata, get_peak_mem, sedate +import tracemalloc +import numpy as np -def run_garbage_collection(): - for _ in range(10): - adata = gen_adata(10000, 10000, "X-csc") # noqa: F841 +from .utils import gen_adata class GarbargeCollectionSuite: + runs = 10 + + # https://github.com/pythonprofilers/memory_profiler/issues/402 and other backend does not pick this up def track_peakmem_write_compressed(self, *_): - return get_peak_mem((sedate(run_garbage_collection), (), {})) + def display_top(snapshot, key_type="lineno"): + snapshot = snapshot.filter_traces( + ( + tracemalloc.Filter(False, ""), + tracemalloc.Filter(False, ""), + ) + ) + top_stats = snapshot.statistics(key_type) + total = sum(stat.size for stat in top_stats) + return total + + total = np.zeros(self.n) + tracemalloc.start() + for i in range(self.n): + data = gen_adata(10000, 10000, "X-csc") # noqa: F841 + snapshot = tracemalloc.take_snapshot() + total[i] = display_top(snapshot) + tracemalloc.stop() + return max(total) From 9e08151b4839d74c561dddc6527a52bf9a8386aa Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Wed, 5 Jun 2024 15:33:44 +0200 Subject: [PATCH 13/33] (fix): `n` -> `runs` --- benchmarks/benchmarks/anndata.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/benchmarks/anndata.py b/benchmarks/benchmarks/anndata.py index a38256cd8..9a0e9a712 100644 --- a/benchmarks/benchmarks/anndata.py +++ b/benchmarks/benchmarks/anndata.py @@ -23,9 +23,9 @@ def display_top(snapshot, key_type="lineno"): total = sum(stat.size for stat in top_stats) return total - total = np.zeros(self.n) + total = np.zeros(self.runs) tracemalloc.start() - for i in range(self.n): + for i in range(self.runs): data = gen_adata(10000, 10000, "X-csc") # noqa: F841 snapshot = tracemalloc.take_snapshot() total[i] = display_top(snapshot) From c7710793b5bd685d3cffd010e5e2ea47f383b7c1 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Wed, 5 Jun 2024 15:58:24 +0200 Subject: [PATCH 14/33] (fix): comment --- benchmarks/benchmarks/anndata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/benchmarks/anndata.py b/benchmarks/benchmarks/anndata.py index 9a0e9a712..4c45917a8 100644 --- a/benchmarks/benchmarks/anndata.py +++ b/benchmarks/benchmarks/anndata.py @@ -10,7 +10,7 @@ class GarbargeCollectionSuite: runs = 10 - # https://github.com/pythonprofilers/memory_profiler/issues/402 and other backend does not pick this up + # custom because `memory_profiler` is a line-by-line profiler (also: https://github.com/pythonprofilers/memory_profiler/issues/402) def track_peakmem_write_compressed(self, *_): def display_top(snapshot, key_type="lineno"): snapshot = snapshot.filter_traces( From 9d3caad382e57b3048e91d1985adf733b1c6190a Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Wed, 5 Jun 2024 16:18:24 +0200 Subject: [PATCH 15/33] (fix): `vals` typing for aligned mappings --- src/anndata/_core/aligned_mapping.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/anndata/_core/aligned_mapping.py b/src/anndata/_core/aligned_mapping.py index 0869cd900..e345238ee 100644 --- a/src/anndata/_core/aligned_mapping.py +++ b/src/anndata/_core/aligned_mapping.py @@ -284,7 +284,7 @@ def __init__( self, parent: AnnData | Raw, axis: int, - vals: Mapping | AxisArraysBase | None = None, + vals: dict[str, V] | AxisArraysBase = {}, ): self._parent = parent if axis not in (0, 1): @@ -332,7 +332,7 @@ def copy(self) -> Layers: class Layers(AlignedActualMixin, LayersBase): def __init__( - self, parent: AnnData, axis: tuple[int] = (0, 1), vals: Mapping | None = None + self, parent: AnnData, axis: tuple[int] = (0, 1), vals: dict[str, V] = {} ): assert axis == (0, 1), axis self._parent = parent @@ -386,7 +386,7 @@ def __init__( self, parent: AnnData, axis: int, - vals: Mapping | None = None, + vals: dict[str, V] = {}, ): self._parent = parent if axis not in (0, 1): From eeb8865d09447e111caaaae32f8d9719273d3309 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Wed, 5 Jun 2024 16:25:32 +0200 Subject: [PATCH 16/33] (fix): don't use mutable argument --- src/anndata/_core/aligned_mapping.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/anndata/_core/aligned_mapping.py b/src/anndata/_core/aligned_mapping.py index e345238ee..33638d449 100644 --- a/src/anndata/_core/aligned_mapping.py +++ b/src/anndata/_core/aligned_mapping.py @@ -284,12 +284,14 @@ def __init__( self, parent: AnnData | Raw, axis: int, - vals: dict[str, V] | AxisArraysBase = {}, + vals: dict[str, V] | AxisArraysBase | None = None, ): self._parent = parent if axis not in (0, 1): raise ValueError() self._axis = axis + if vals is None: + vals = {} for k, v in vals.items(): vals[k] = self._validate_value(v, k) self._data = vals @@ -332,10 +334,15 @@ def copy(self) -> Layers: class Layers(AlignedActualMixin, LayersBase): def __init__( - self, parent: AnnData, axis: tuple[int] = (0, 1), vals: dict[str, V] = {} + self, + parent: AnnData, + axis: tuple[int] = (0, 1), + vals: dict[str, V] | None = None, ): assert axis == (0, 1), axis self._parent = parent + if vals is None: + vals = {} for k, v in vals.items(): vals[k] = self._validate_value(v, k) self._data = vals @@ -386,12 +393,14 @@ def __init__( self, parent: AnnData, axis: int, - vals: dict[str, V] = {}, + vals: dict[str, V] | None = None, ): self._parent = parent if axis not in (0, 1): raise ValueError() self._axis = axis + if vals is None: + vals = {} for k, v in vals.items(): vals[k] = self._validate_value(v, k) self._data = vals From 5267c2dc006343d7bc2e8e15c5e52bb846e48328 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Wed, 5 Jun 2024 18:02:29 +0200 Subject: [PATCH 17/33] (fix): change profiler name to `track_peakmem_garbage_collection` --- benchmarks/benchmarks/anndata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/benchmarks/anndata.py b/benchmarks/benchmarks/anndata.py index 4c45917a8..15fb485a2 100644 --- a/benchmarks/benchmarks/anndata.py +++ b/benchmarks/benchmarks/anndata.py @@ -11,7 +11,7 @@ class GarbargeCollectionSuite: runs = 10 # custom because `memory_profiler` is a line-by-line profiler (also: https://github.com/pythonprofilers/memory_profiler/issues/402) - def track_peakmem_write_compressed(self, *_): + def track_peakmem_garbage_collection(self, *_): def display_top(snapshot, key_type="lineno"): snapshot = snapshot.filter_traces( ( From 13d134a01cd1084f13c03f963960ab4ae280147c Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Wed, 5 Jun 2024 18:05:42 +0200 Subject: [PATCH 18/33] (chore): delete rogue comment --- src/anndata/_core/raw.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/anndata/_core/raw.py b/src/anndata/_core/raw.py index 3607a7176..c426cca99 100644 --- a/src/anndata/_core/raw.py +++ b/src/anndata/_core/raw.py @@ -52,7 +52,6 @@ def __init__( self._X = adata.X.copy() self._var = adata.var.copy() self.varm = adata.varm.copy() - # self._varm = AxisArrays(self, 1, adata.varm.copy()) elif adata.isbacked: raise ValueError("Cannot specify X if adata is backed") From 4dec616a63c73d63448ef1390bfb58e85f01d4ea Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Fri, 7 Jun 2024 15:40:46 +0200 Subject: [PATCH 19/33] clarify reference use --- src/anndata/_core/aligned_mapping.py | 48 +++++++++---------- .../multi_files/_anncollection.py | 4 +- 2 files changed, 25 insertions(+), 27 deletions(-) diff --git a/src/anndata/_core/aligned_mapping.py b/src/anndata/_core/aligned_mapping.py index 33638d449..b9531583f 100644 --- a/src/anndata/_core/aligned_mapping.py +++ b/src/anndata/_core/aligned_mapping.py @@ -3,7 +3,7 @@ import warnings from abc import ABC, abstractmethod from collections import abc as cabc -from collections.abc import Iterator, Mapping, Sequence +from collections.abc import Iterator, Mapping, MutableMapping, Sequence from copy import copy from typing import ( TYPE_CHECKING, @@ -121,7 +121,7 @@ def parent(self) -> AnnData | Raw: return self._parent def copy(self): - d = self._actual_class(self.parent, self._axis, {}) + d = self._actual_class(self.parent, axis=self._axis, store={}) for k, v in self.items(): if isinstance(v, AwkArray): # Shallow copy since awkward array buffers are immutable @@ -193,7 +193,7 @@ def __len__(self) -> int: class AlignedActualMixin: - _data: dict[str, V] + _data: MutableMapping[str, V] """Underlying mapping to the data""" is_view = False @@ -278,23 +278,21 @@ def dim_names(self) -> pd.Index: return (self.parent.obs_names, self.parent.var_names)[self._axis] -# TODO: vals can't be None class AxisArrays(AlignedActualMixin, AxisArraysBase): def __init__( self, parent: AnnData | Raw, + *, + store: MutableMapping[str, V] | AxisArraysBase, axis: int, - vals: dict[str, V] | AxisArraysBase | None = None, ): self._parent = parent if axis not in (0, 1): raise ValueError() self._axis = axis - if vals is None: - vals = {} - for k, v in vals.items(): - vals[k] = self._validate_value(v, k) - self._data = vals + self._data = store + for k, v in self._data.items(): + self._data[k] = self._validate_value(v, k) class AxisArraysView(AlignedViewMixin, AxisArraysBase): @@ -326,7 +324,7 @@ class LayersBase(AlignedMapping): # TODO: I thought I had a more elegant solution to overriding this... def copy(self) -> Layers: - d = self._actual_class(self.parent, vals={}) + d = self._actual_class(self.parent, store={}) for k, v in self.items(): d[k] = v.copy() return d @@ -336,16 +334,15 @@ class Layers(AlignedActualMixin, LayersBase): def __init__( self, parent: AnnData, - axis: tuple[int] = (0, 1), - vals: dict[str, V] | None = None, + *, + axis: tuple[int, int] = (0, 1), + store: MutableMapping[str, V], ): assert axis == (0, 1), axis self._parent = parent - if vals is None: - vals = {} - for k, v in vals.items(): - vals[k] = self._validate_value(v, k) - self._data = vals + self._data = store + for k, v in self._data.items(): + self._data[k] = self._validate_value(v, k) class LayersView(AlignedViewMixin, LayersBase): @@ -392,18 +389,17 @@ class PairwiseArrays(AlignedActualMixin, PairwiseArraysBase): def __init__( self, parent: AnnData, + *, axis: int, - vals: dict[str, V] | None = None, + store: MutableMapping[str, V], ): self._parent = parent if axis not in (0, 1): raise ValueError() self._axis = axis - if vals is None: - vals = {} - for k, v in vals.items(): - vals[k] = self._validate_value(v, k) - self._data = vals + self._data = store + for k, v in self._data.items(): + self._data[k] = self._validate_value(v, k) class PairwiseArraysView(AlignedViewMixin, PairwiseArraysBase): @@ -440,11 +436,11 @@ def __get__(self, obj, objtype=None): obj, tuple(idxs[ax] for ax in parent_aligned_mapping.axes) ) else: - return self.cls(obj, self.axis, getattr(obj, "_" + self.name)) + return self.cls(obj, axis=self.axis, store=getattr(obj, "_" + self.name)) def __set__(self, obj, value): value = convert_to_dict(value) - _ = self.cls(obj, self.axis, value) # Validate + _ = self.cls(obj, axis=self.axis, store=value) # Validate if obj.is_view: obj._init_as_actual(obj.copy()) setattr(obj, "_" + self.name, value) diff --git a/src/anndata/experimental/multi_files/_anncollection.py b/src/anndata/experimental/multi_files/_anncollection.py index 09533522a..c8bd91923 100644 --- a/src/anndata/experimental/multi_files/_anncollection.py +++ b/src/anndata/experimental/multi_files/_anncollection.py @@ -758,7 +758,9 @@ def __init__( self._obsm = inner_concat_aligned_mapping( [a.obsm for a in adatas], index=self.obs_names ) - self._obsm = AxisArrays(self, axis=0) if self._obsm == {} else self._obsm + self._obsm = ( + AxisArrays(self, axis=0, store={}) if self._obsm == {} else self._obsm + ) # process inner join of views self._view_attrs_keys = {} From 5361188695b5cefaefacbe36be087a9a461bab5d Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Fri, 7 Jun 2024 15:46:17 +0200 Subject: [PATCH 20/33] consistency --- src/anndata/_core/aligned_mapping.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/anndata/_core/aligned_mapping.py b/src/anndata/_core/aligned_mapping.py index b9531583f..39615617e 100644 --- a/src/anndata/_core/aligned_mapping.py +++ b/src/anndata/_core/aligned_mapping.py @@ -283,8 +283,8 @@ def __init__( self, parent: AnnData | Raw, *, - store: MutableMapping[str, V] | AxisArraysBase, axis: int, + store: MutableMapping[str, V] | AxisArraysBase, ): self._parent = parent if axis not in (0, 1): From b61549da3ab2c4a712cad696cbcc2ecac6097e39 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Fri, 7 Jun 2024 16:34:29 +0200 Subject: [PATCH 21/33] remove boilerplate --- src/anndata/_core/aligned_mapping.py | 6 +- src/anndata/_core/anndata.py | 164 ++++++++------------------- 2 files changed, 51 insertions(+), 119 deletions(-) diff --git a/src/anndata/_core/aligned_mapping.py b/src/anndata/_core/aligned_mapping.py index 39615617e..c349aaa03 100644 --- a/src/anndata/_core/aligned_mapping.py +++ b/src/anndata/_core/aligned_mapping.py @@ -419,15 +419,15 @@ def __init__( PairwiseArraysBase._actual_class = PairwiseArrays -class AlignedMappingProperty: +class AlignedMappingProperty(property): def __init__(self, name, cls, axis): self.name = name self.axis = axis self.cls = cls def __get__(self, obj, objtype=None): - if obj is None: # None check needed for AnnData.layers accessors - return self.cls + if obj is None: # needs to return a `property`, e.g. for Sphinx + return self if obj.is_view: parent_anndata = obj._adata_ref idxs = (obj._oidx, obj._vidx) diff --git a/src/anndata/_core/anndata.py b/src/anndata/_core/anndata.py index dca8cf48a..b76b19131 100644 --- a/src/anndata/_core/anndata.py +++ b/src/anndata/_core/anndata.py @@ -43,15 +43,7 @@ from ..utils import axis_len, deprecated, ensure_df_homogeneous from .access import ElementRef from .aligned_df import _gen_dataframe -from .aligned_mapping import ( - AlignedMappingProperty, - AxisArrays, - AxisArraysView, - Layers, - LayersView, - PairwiseArrays, - PairwiseArraysView, -) +from .aligned_mapping import AlignedMappingProperty, AxisArrays, Layers, PairwiseArrays from .file_backing import AnnDataFileManager, to_memory from .index import Index, Index1D, _normalize_indices, _subset, get_vector from .raw import Raw @@ -670,132 +662,72 @@ def X(self, value: np.ndarray | sparse.spmatrix | SpArray | None): def X(self): self.X = None - __obsm__alignedmapping_property = AlignedMappingProperty("obsm", AxisArrays, 0) - - @property - def obsm(self) -> AxisArrays | AxisArraysView: - """\ - Multi-dimensional annotation of observations - (mutable structured :class:`~numpy.ndarray`). - - Stores for each key a two or higher-dimensional :class:`~numpy.ndarray` - of length `n_obs`. - Is sliced with `data` and `obs` but behaves otherwise like a :term:`mapping`. - """ - return self.__obsm__alignedmapping_property - - @obsm.setter - def obsm(self, val): - self.__obsm__alignedmapping_property = val - - @obsm.deleter - def obsm(self): - del self.__obsm__alignedmapping_property - - __varm__alignedmapping_property = AlignedMappingProperty("varm", AxisArrays, 1) - - @property - def varm(self) -> AxisArrays | AxisArraysView: - """\ - Multi-dimensional annotation of variables/features - (mutable structured :class:`~numpy.ndarray`). - - Stores for each key a two or higher-dimensional :class:`~numpy.ndarray` - of length `n_vars`. - Is sliced with `data` and `var` but behaves otherwise like a :term:`mapping`. - """ - return self.__varm__alignedmapping_property - - @varm.setter - def varm(self, val): - self.__varm__alignedmapping_property = val - - @varm.deleter - def varm(self): - del self.__varm__alignedmapping_property - - __layers__alignedmapping_property = AlignedMappingProperty("layers", Layers, (0, 1)) - - @property - def layers(self) -> Layers | LayersView: - """\ - Dictionary-like object with values of the same dimensions as :attr:`X`. - - Layers in AnnData are inspired by loompy’s :ref:`loomlayers`. - - Return the layer named `"unspliced"`:: - - adata.layers["unspliced"] - - Create or replace the `"spliced"` layer:: + obsm = AlignedMappingProperty("obsm", AxisArrays, 0) + """\ + Multi-dimensional annotation of observations + (mutable structured :class:`~numpy.ndarray`). - adata.layers["spliced"] = ... + Stores for each key a two or higher-dimensional :class:`~numpy.ndarray` + of length `n_obs`. + Is sliced with `data` and `obs` but behaves otherwise like a :term:`mapping`. + """ - Assign the 10th column of layer `"spliced"` to the variable a:: + varm = AlignedMappingProperty("varm", AxisArrays, 1) + """\ + Multi-dimensional annotation of variables/features + (mutable structured :class:`~numpy.ndarray`). - a = adata.layers["spliced"][:, 10] + Stores for each key a two or higher-dimensional :class:`~numpy.ndarray` + of length `n_vars`. + Is sliced with `data` and `var` but behaves otherwise like a :term:`mapping`. + """ - Delete the `"spliced"` layer:: + layers = AlignedMappingProperty("layers", Layers, (0, 1)) + """\ + Dictionary-like object with values of the same dimensions as :attr:`X`. - del adata.layers["spliced"] + Layers in AnnData are inspired by loompy’s :ref:`loomlayers`. - Return layers’ names:: + Return the layer named `"unspliced"`:: - adata.layers.keys() - """ - return self.__layers__alignedmapping_property + adata.layers["unspliced"] - @layers.setter - def layers(self, val): - self.__layers__alignedmapping_property = val + Create or replace the `"spliced"` layer:: - @layers.deleter - def layers(self): - del self.__layers__alignedmapping_property + adata.layers["spliced"] = ... - __obsp__alignedmapping_property = AlignedMappingProperty("obsp", PairwiseArrays, 0) + Assign the 10th column of layer `"spliced"` to the variable a:: - @property - def obsp(self) -> PairwiseArrays | PairwiseArraysView: - """\ - Pairwise annotation of observations, - a mutable mapping with array-like values. + a = adata.layers["spliced"][:, 10] - Stores for each key a two or higher-dimensional :class:`~numpy.ndarray` - whose first two dimensions are of length `n_obs`. - Is sliced with `data` and `obs` but behaves otherwise like a :term:`mapping`. - """ - return self.__obsp__alignedmapping_property + Delete the `"spliced"` layer:: - @obsp.setter - def obsp(self, val): - self.__obsp__alignedmapping_property = val + del adata.layers["spliced"] - @obsp.deleter - def obsp(self): - del self.__obsp__alignedmapping_property + Return layers’ names:: - __varp__alignedmapping_property = AlignedMappingProperty("varp", PairwiseArrays, 1) + adata.layers.keys() + """ - @property - def varp(self) -> PairwiseArrays | PairwiseArraysView: - """\ - Pairwise annotation of variables/features, - a mutable mapping with array-like values. + obsp = AlignedMappingProperty("obsp", PairwiseArrays, 0) + """\ + Pairwise annotation of observations, + a mutable mapping with array-like values. - Stores for each key a two or higher-dimensional :class:`~numpy.ndarray` - whose first two dimensions are of length `n_var`. - Is sliced with `data` and `var` but behaves otherwise like a :term:`mapping`. - """ - return self.__varp__alignedmapping_property + Stores for each key a two or higher-dimensional :class:`~numpy.ndarray` + whose first two dimensions are of length `n_obs`. + Is sliced with `data` and `obs` but behaves otherwise like a :term:`mapping`. + """ - @varp.setter - def varp(self, val): - self.__varp__alignedmapping_property = val + varp = AlignedMappingProperty("varp", PairwiseArrays, 1) + """\ + Pairwise annotation of variables/features, + a mutable mapping with array-like values. - @varp.deleter - def varp(self): - del self.__varp__alignedmapping_property + Stores for each key a two or higher-dimensional :class:`~numpy.ndarray` + whose first two dimensions are of length `n_var`. + Is sliced with `data` and `var` but behaves otherwise like a :term:`mapping`. + """ @property def raw(self) -> Raw: From 1a8549741c0306fc20dbd5eaffe7f6efbf971dc0 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Fri, 7 Jun 2024 17:11:32 +0200 Subject: [PATCH 22/33] add types, undo moves --- src/anndata/_core/aligned_mapping.py | 39 ++++++---- src/anndata/_core/anndata.py | 106 +++++++++++++-------------- 2 files changed, 76 insertions(+), 69 deletions(-) diff --git a/src/anndata/_core/aligned_mapping.py b/src/anndata/_core/aligned_mapping.py index c349aaa03..37edfc4c5 100644 --- a/src/anndata/_core/aligned_mapping.py +++ b/src/anndata/_core/aligned_mapping.py @@ -3,19 +3,17 @@ import warnings from abc import ABC, abstractmethod from collections import abc as cabc -from collections.abc import Iterator, Mapping, MutableMapping, Sequence from copy import copy from typing import ( TYPE_CHECKING, ClassVar, + Generic, Literal, TypeVar, Union, ) -import numpy as np import pandas as pd -from scipy.sparse import spmatrix from anndata._warnings import ExperimentalFeatureWarning, ImplicitModificationWarning from anndata.compat import AwkArray @@ -32,16 +30,23 @@ from .views import as_view, view_update if TYPE_CHECKING: + from collections.abc import Iterable, Iterator, Mapping, MutableMapping, Sequence + + import numpy as np + from scipy.sparse import spmatrix + from .anndata import AnnData from .raw import Raw + OneDIdx = Union[Sequence[int], Sequence[bool], slice] + TwoDIdx = tuple[OneDIdx, OneDIdx] -OneDIdx = Union[Sequence[int], Sequence[bool], slice] -TwoDIdx = tuple[OneDIdx, OneDIdx] + I = TypeVar("I", OneDIdx, TwoDIdx, covariant=True) + # TODO: pd.DataFrame only allowed in AxisArrays? + V = Union[pd.DataFrame, spmatrix, np.ndarray] -I = TypeVar("I", OneDIdx, TwoDIdx, covariant=True) -# TODO: pd.DataFrame only allowed in AxisArrays? -V = Union[pd.DataFrame, spmatrix, np.ndarray] +# Used in `Generic[T]` +T = TypeVar("T") class AlignedMapping(cabc.MutableMapping, ABC): @@ -335,7 +340,7 @@ def __init__( self, parent: AnnData, *, - axis: tuple[int, int] = (0, 1), + axis: tuple[Literal[0], Literal[1]] = (0, 1), store: MutableMapping[str, V], ): assert axis == (0, 1), axis @@ -419,15 +424,17 @@ def __init__( PairwiseArraysBase._actual_class = PairwiseArrays -class AlignedMappingProperty(property): - def __init__(self, name, cls, axis): +class AlignedMappingProperty(property, Generic[T]): + def __init__( + self, name: str, cls: T, axis: Literal[0, 1] | tuple[Literal[0], Literal[1]] + ): self.name = name self.axis = axis self.cls = cls - def __get__(self, obj, objtype=None): + def __get__(self, obj: None | AnnData, objtype: type | None = None) -> T: if obj is None: # needs to return a `property`, e.g. for Sphinx - return self + return self # type: ignore if obj.is_view: parent_anndata = obj._adata_ref idxs = (obj._oidx, obj._vidx) @@ -438,12 +445,14 @@ def __get__(self, obj, objtype=None): else: return self.cls(obj, axis=self.axis, store=getattr(obj, "_" + self.name)) - def __set__(self, obj, value): + def __set__( + self, obj: AnnData, value: Mapping[str, V] | Iterable[tuple[str, V]] + ) -> None: value = convert_to_dict(value) _ = self.cls(obj, axis=self.axis, store=value) # Validate if obj.is_view: obj._init_as_actual(obj.copy()) setattr(obj, "_" + self.name, value) - def __delete__(self, obj): + def __delete__(self, obj) -> None: setattr(obj, self.name, dict()) diff --git a/src/anndata/_core/anndata.py b/src/anndata/_core/anndata.py index b76b19131..5ada0beb0 100644 --- a/src/anndata/_core/anndata.py +++ b/src/anndata/_core/anndata.py @@ -662,26 +662,6 @@ def X(self, value: np.ndarray | sparse.spmatrix | SpArray | None): def X(self): self.X = None - obsm = AlignedMappingProperty("obsm", AxisArrays, 0) - """\ - Multi-dimensional annotation of observations - (mutable structured :class:`~numpy.ndarray`). - - Stores for each key a two or higher-dimensional :class:`~numpy.ndarray` - of length `n_obs`. - Is sliced with `data` and `obs` but behaves otherwise like a :term:`mapping`. - """ - - varm = AlignedMappingProperty("varm", AxisArrays, 1) - """\ - Multi-dimensional annotation of variables/features - (mutable structured :class:`~numpy.ndarray`). - - Stores for each key a two or higher-dimensional :class:`~numpy.ndarray` - of length `n_vars`. - Is sliced with `data` and `var` but behaves otherwise like a :term:`mapping`. - """ - layers = AlignedMappingProperty("layers", Layers, (0, 1)) """\ Dictionary-like object with values of the same dimensions as :attr:`X`. @@ -709,26 +689,6 @@ def X(self): adata.layers.keys() """ - obsp = AlignedMappingProperty("obsp", PairwiseArrays, 0) - """\ - Pairwise annotation of observations, - a mutable mapping with array-like values. - - Stores for each key a two or higher-dimensional :class:`~numpy.ndarray` - whose first two dimensions are of length `n_obs`. - Is sliced with `data` and `obs` but behaves otherwise like a :term:`mapping`. - """ - - varp = AlignedMappingProperty("varp", PairwiseArrays, 1) - """\ - Pairwise annotation of variables/features, - a mutable mapping with array-like values. - - Stores for each key a two or higher-dimensional :class:`~numpy.ndarray` - whose first two dimensions are of length `n_var`. - Is sliced with `data` and `var` but behaves otherwise like a :term:`mapping`. - """ - @property def raw(self) -> Raw: """\ @@ -910,6 +870,46 @@ def uns(self, value: MutableMapping): def uns(self): self.uns = OrderedDict() + obsm = AlignedMappingProperty("obsm", AxisArrays, 0) + """\ + Multi-dimensional annotation of observations + (mutable structured :class:`~numpy.ndarray`). + + Stores for each key a two or higher-dimensional :class:`~numpy.ndarray` + of length `n_obs`. + Is sliced with `data` and `obs` but behaves otherwise like a :term:`mapping`. + """ + + varm = AlignedMappingProperty("varm", AxisArrays, 1) + """\ + Multi-dimensional annotation of variables/features + (mutable structured :class:`~numpy.ndarray`). + + Stores for each key a two or higher-dimensional :class:`~numpy.ndarray` + of length `n_vars`. + Is sliced with `data` and `var` but behaves otherwise like a :term:`mapping`. + """ + + obsp = AlignedMappingProperty("obsp", PairwiseArrays, 0) + """\ + Pairwise annotation of observations, + a mutable mapping with array-like values. + + Stores for each key a two or higher-dimensional :class:`~numpy.ndarray` + whose first two dimensions are of length `n_obs`. + Is sliced with `data` and `obs` but behaves otherwise like a :term:`mapping`. + """ + + varp = AlignedMappingProperty("varp", PairwiseArrays, 1) + """\ + Pairwise annotation of variables/features, + a mutable mapping with array-like values. + + Stores for each key a two or higher-dimensional :class:`~numpy.ndarray` + whose first two dimensions are of length `n_var`. + Is sliced with `data` and `var` but behaves otherwise like a :term:`mapping`. + """ + def obs_keys(self) -> list[str]: """List keys of observation annotation :attr:`obs`.""" return self._obs.keys().tolist() @@ -920,11 +920,11 @@ def var_keys(self) -> list[str]: def obsm_keys(self) -> list[str]: """List keys of observation annotation :attr:`obsm`.""" - return list(self._obsm.keys()) + return list(self.obsm.keys()) def varm_keys(self) -> list[str]: """List keys of variable annotation :attr:`varm`.""" - return list(self._varm.keys()) + return list(self.varm.keys()) def uns_keys(self) -> list[str]: """List keys of unstructured annotation.""" @@ -1203,10 +1203,10 @@ def transpose(self) -> AnnData: obs=self.var, var=self.obs, uns=self._uns, - obsm=self._varm, - varm=self._obsm, - obsp=self._varp, - varp=self._obsp, + obsm=self.varm, + varm=self.obsm, + obsp=self.varp, + varp=self.obsp, filename=self.filename, ) @@ -1763,24 +1763,22 @@ def _check_dimensions(self, key=None): else: key = {key} if "obsm" in key: - obsm = self._obsm if ( - not all([axis_len(o, 0) == self.n_obs for o in obsm.values()]) - and len(obsm.dim_names) != self.n_obs + not all([axis_len(o, 0) == self.n_obs for o in self.obsm.values()]) + and len(self.obsm.dim_names) != self.n_obs ): raise ValueError( "Observations annot. `obsm` must have number of rows of `X`" - f" ({self.n_obs}), but has {len(obsm)} rows." + f" ({self.n_obs}), but has {len(self.obsm)} rows." ) if "varm" in key: - varm = self._varm if ( - not all([axis_len(v, 0) == self.n_vars for v in varm.values()]) - and len(varm.dim_names) != self.n_vars + not all([axis_len(v, 0) == self.n_vars for v in self.varm.values()]) + and len(self.varm.dim_names) != self.n_vars ): raise ValueError( "Variables annot. `varm` must have number of columns of `X`" - f" ({self.n_vars}), but has {len(varm)} rows." + f" ({self.n_vars}), but has {len(self.varm)} rows." ) def write_h5ad( From f750954cf34d5cbe312a5bf64f45cf47d85c11ba Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Fri, 7 Jun 2024 17:44:56 +0200 Subject: [PATCH 23/33] Add type hints for properties --- src/anndata/_core/aligned_mapping.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/src/anndata/_core/aligned_mapping.py b/src/anndata/_core/aligned_mapping.py index 37edfc4c5..5a1b83ea9 100644 --- a/src/anndata/_core/aligned_mapping.py +++ b/src/anndata/_core/aligned_mapping.py @@ -45,9 +45,6 @@ # TODO: pd.DataFrame only allowed in AxisArrays? V = Union[pd.DataFrame, spmatrix, np.ndarray] -# Used in `Generic[T]` -T = TypeVar("T") - class AlignedMapping(cabc.MutableMapping, ABC): """\ @@ -424,14 +421,31 @@ def __init__( PairwiseArraysBase._actual_class = PairwiseArrays +T = TypeVar("T", bound=AlignedMapping) + + class AlignedMappingProperty(property, Generic[T]): def __init__( - self, name: str, cls: T, axis: Literal[0, 1] | tuple[Literal[0], Literal[1]] + self, + name: str, + cls: type[T], + axis: Literal[0, 1] | tuple[Literal[0], Literal[1]], ): self.name = name self.axis = axis self.cls = cls + @property + def fget(self) -> cabc.Callable: + """Fake fget for sphinx-autodoc-typehints.""" + + def fake(): ... + + fake.__annotations__ = { + "return": Union[self.cls._actual_class, self.cls._view_class] + } + return fake + def __get__(self, obj: None | AnnData, objtype: type | None = None) -> T: if obj is None: # needs to return a `property`, e.g. for Sphinx return self # type: ignore From 48ae69d0a18d87343c899bfe6d482f4326d487cb Mon Sep 17 00:00:00 2001 From: Phil Schaf Date: Tue, 2 Jul 2024 15:46:51 +0200 Subject: [PATCH 24/33] fmt --- src/anndata/_core/aligned_mapping.py | 10 +++++----- src/anndata/_core/raw.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/anndata/_core/aligned_mapping.py b/src/anndata/_core/aligned_mapping.py index 4cbaf8496..7217546f9 100644 --- a/src/anndata/_core/aligned_mapping.py +++ b/src/anndata/_core/aligned_mapping.py @@ -2,7 +2,7 @@ import warnings from abc import ABC, abstractmethod -from collections import abc as cabc +from collections.abc import Callable, Collection, MutableMapping from copy import copy from typing import ( TYPE_CHECKING, @@ -24,7 +24,7 @@ from .views import as_view, view_update if TYPE_CHECKING: - from collections.abc import Iterable, Iterator, Mapping, MutableMapping, Sequence + from collections.abc import Iterable, Iterator, Mapping, Sequence import numpy as np from scipy.sparse import spmatrix @@ -40,7 +40,7 @@ V = Union[pd.DataFrame, spmatrix, np.ndarray] -class AlignedMapping(cabc.MutableMapping, ABC): +class AlignedMapping(MutableMapping, ABC): """\ An abstract base class for Mappings containing array-like values aligned to either one or both AnnData axes. @@ -253,7 +253,7 @@ def to_df(self) -> pd.DataFrame: def _validate_value(self, val: V, key: str) -> V: if ( hasattr(val, "index") - and isinstance(val.index, cabc.Collection) + and isinstance(val.index, Collection) and not val.index.equals(self.dim_names) ): # Could probably also re-order index if it’s contained @@ -428,7 +428,7 @@ def __init__( self.cls = cls @property - def fget(self) -> cabc.Callable: + def fget(self) -> Callable: """Fake fget for sphinx-autodoc-typehints.""" def fake(): ... diff --git a/src/anndata/_core/raw.py b/src/anndata/_core/raw.py index c426cca99..d805afdb3 100644 --- a/src/anndata/_core/raw.py +++ b/src/anndata/_core/raw.py @@ -43,7 +43,7 @@ def __init__( self._var = _gen_dataframe( var, ["var_names"], source="X", attr="var", length=n_var ) - self._varm = varm + self.varm = varm elif X is None: # construct from adata # Move from GPU to CPU since it's large and not always used if isinstance(adata.X, (CupyArray, CupySparseMatrix)): From 480b0a41649429fb5b5d61ed0fb47bb826b38f1a Mon Sep 17 00:00:00 2001 From: Phil Schaf Date: Tue, 2 Jul 2024 16:48:17 +0200 Subject: [PATCH 25/33] cleanup --- src/anndata/_core/aligned_mapping.py | 54 ++++++++++++---------------- src/anndata/_core/anndata.py | 2 +- 2 files changed, 24 insertions(+), 32 deletions(-) diff --git a/src/anndata/_core/aligned_mapping.py b/src/anndata/_core/aligned_mapping.py index 7217546f9..dacc8051d 100644 --- a/src/anndata/_core/aligned_mapping.py +++ b/src/anndata/_core/aligned_mapping.py @@ -97,18 +97,15 @@ def _validate_value(self, val: V, key: str) -> V: @abstractmethod def attrname(self) -> str: """What attr for the AnnData is this?""" - pass @property @abstractmethod def axes(self) -> tuple[Literal[0, 1], ...]: """Which axes of the parent is this aligned to?""" - pass @property @abstractmethod - def is_view(self) -> bool: - pass + def is_view(self) -> bool: ... @property def parent(self) -> AnnData | Raw: @@ -221,6 +218,8 @@ class AxisArraysBase(AlignedMapping): _allow_df = True _dimnames = ("obs", "var") + _axis: Literal[0, 1] + @property def attrname(self) -> str: return f"{self.dim}m" @@ -235,12 +234,6 @@ def dim(self) -> str: """Name of the dimension this aligned to.""" return self._dimnames[self._axis] - def flipped(self) -> AxisArraysBase: - """Transpose.""" - new = self.copy() - new.dimension = abs(self._axis - 1) - return new - def to_df(self) -> pd.DataFrame: """Convert to pandas dataframe.""" df = pd.DataFrame(index=self.dim_names) @@ -277,7 +270,7 @@ def __init__( self, parent: AnnData | Raw, *, - axis: int, + axis: Literal[0, 1], store: MutableMapping[str, V] | AxisArraysBase, ): self._parent = parent @@ -325,14 +318,7 @@ def copy(self) -> Layers: class Layers(AlignedActualMixin, LayersBase): - def __init__( - self, - parent: AnnData, - *, - axis: tuple[Literal[0], Literal[1]] = (0, 1), - store: MutableMapping[str, V], - ): - assert axis == (0, 1), axis + def __init__(self, parent: AnnData, *, store: MutableMapping[str, V]): self._parent = parent self._data = store for k, v in self._data.items(): @@ -364,6 +350,8 @@ class PairwiseArraysBase(AlignedMapping): _allow_df = False _dimnames = ("obs", "var") + _axis: Literal[0, 1] + @property def attrname(self) -> str: return f"{self.dim}p" @@ -384,7 +372,7 @@ def __init__( self, parent: AnnData, *, - axis: int, + axis: Literal[0, 1], store: MutableMapping[str, V], ): self._parent = parent @@ -421,12 +409,17 @@ def __init__( self, name: str, cls: type[T], - axis: Literal[0, 1] | tuple[Literal[0], Literal[1]], + axis: Literal[0, 1] | None = None, ): self.name = name self.axis = axis self.cls = cls + def construct(self, obj: AnnData, store: MutableMapping[str, V]) -> T: + if self.axis is None: + return self.cls(obj, store=store) + return self.cls(obj, axis=self.axis, store=store) + @property def fget(self) -> Callable: """Fake fget for sphinx-autodoc-typehints.""" @@ -441,21 +434,20 @@ def fake(): ... def __get__(self, obj: None | AnnData, objtype: type | None = None) -> T: if obj is None: # needs to return a `property`, e.g. for Sphinx return self # type: ignore - if obj.is_view: - parent_anndata = obj._adata_ref - idxs = (obj._oidx, obj._vidx) - parent_aligned_mapping = getattr(parent_anndata, self.name) - return parent_aligned_mapping._view( - obj, tuple(idxs[ax] for ax in parent_aligned_mapping.axes) - ) - else: - return self.cls(obj, axis=self.axis, store=getattr(obj, "_" + self.name)) + if not obj.is_view: + return self.construct(obj, getattr(obj, f"_{self.name}")) + parent_anndata = obj._adata_ref + idxs = (obj._oidx, obj._vidx) + parent_aligned_mapping: AlignedMapping = getattr(parent_anndata, self.name) + return parent_aligned_mapping._view( + obj, tuple(idxs[ax] for ax in parent_aligned_mapping.axes) + ) def __set__( self, obj: AnnData, value: Mapping[str, V] | Iterable[tuple[str, V]] ) -> None: value = convert_to_dict(value) - _ = self.cls(obj, axis=self.axis, store=value) # Validate + _ = self.construct(obj, value) # Validate if obj.is_view: obj._init_as_actual(obj.copy()) setattr(obj, "_" + self.name, value) diff --git a/src/anndata/_core/anndata.py b/src/anndata/_core/anndata.py index e058e0cf5..d52f4224d 100644 --- a/src/anndata/_core/anndata.py +++ b/src/anndata/_core/anndata.py @@ -623,7 +623,7 @@ def X(self, value: np.ndarray | sparse.spmatrix | SpArray | None): def X(self): self.X = None - layers = AlignedMappingProperty("layers", Layers, (0, 1)) + layers = AlignedMappingProperty("layers", Layers) """\ Dictionary-like object with values of the same dimensions as :attr:`X`. From 24b89a08ca2ff50640bd9e63ffbdef4cdf26cb78 Mon Sep 17 00:00:00 2001 From: Phil Schaf Date: Thu, 4 Jul 2024 12:13:53 +0200 Subject: [PATCH 26/33] more fmt --- src/anndata/_core/aligned_mapping.py | 20 +++++++------------- src/anndata/_core/anndata.py | 4 ++-- 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/src/anndata/_core/aligned_mapping.py b/src/anndata/_core/aligned_mapping.py index dacc8051d..fd5cb7277 100644 --- a/src/anndata/_core/aligned_mapping.py +++ b/src/anndata/_core/aligned_mapping.py @@ -4,14 +4,7 @@ from abc import ABC, abstractmethod from collections.abc import Callable, Collection, MutableMapping from copy import copy -from typing import ( - TYPE_CHECKING, - ClassVar, - Generic, - Literal, - TypeVar, - Union, -) +from typing import TYPE_CHECKING, Generic, TypeVar, Union import pandas as pd @@ -25,6 +18,7 @@ if TYPE_CHECKING: from collections.abc import Iterable, Iterator, Mapping, Sequence + from typing import ClassVar, Literal, Self import numpy as np from scipy.sparse import spmatrix @@ -111,7 +105,7 @@ def is_view(self) -> bool: ... def parent(self) -> AnnData | Raw: return self._parent - def copy(self): + def copy(self) -> Self: d = self._actual_class(self.parent, axis=self._axis, store={}) for k, v in self.items(): if isinstance(v, AwkArray): @@ -415,7 +409,7 @@ def __init__( self.axis = axis self.cls = cls - def construct(self, obj: AnnData, store: MutableMapping[str, V]) -> T: + def construct(self, obj: AnnData, *, store: MutableMapping[str, V]) -> T: if self.axis is None: return self.cls(obj, store=store) return self.cls(obj, axis=self.axis, store=store) @@ -435,7 +429,7 @@ def __get__(self, obj: None | AnnData, objtype: type | None = None) -> T: if obj is None: # needs to return a `property`, e.g. for Sphinx return self # type: ignore if not obj.is_view: - return self.construct(obj, getattr(obj, f"_{self.name}")) + return self.construct(obj, store=getattr(obj, f"_{self.name}")) parent_anndata = obj._adata_ref idxs = (obj._oidx, obj._vidx) parent_aligned_mapping: AlignedMapping = getattr(parent_anndata, self.name) @@ -447,10 +441,10 @@ def __set__( self, obj: AnnData, value: Mapping[str, V] | Iterable[tuple[str, V]] ) -> None: value = convert_to_dict(value) - _ = self.construct(obj, value) # Validate + _ = self.construct(obj, store=value) # Validate if obj.is_view: obj._init_as_actual(obj.copy()) - setattr(obj, "_" + self.name, value) + setattr(obj, f"_{self.name}", value) def __delete__(self, obj) -> None: setattr(obj, self.name, dict()) diff --git a/src/anndata/_core/anndata.py b/src/anndata/_core/anndata.py index d52f4224d..0c3253707 100644 --- a/src/anndata/_core/anndata.py +++ b/src/anndata/_core/anndata.py @@ -1343,8 +1343,8 @@ def to_memory(self, copy=False) -> AnnData: Params ------ - copy: - Whether the arrays that are already in-memory should be copied. + copy + Whether the arrays that are already in-memory should be copied. Example ------- From 05ebee9b49bf1f8d58c73db5479344d9b94aac74 Mon Sep 17 00:00:00 2001 From: Phil Schaf Date: Thu, 4 Jul 2024 13:07:39 +0200 Subject: [PATCH 27/33] dedupe and test --- src/anndata/_core/aligned_mapping.py | 52 ++++++++++++++-------------- tests/test_base.py | 3 ++ tests/test_layers.py | 8 +++-- 3 files changed, 35 insertions(+), 28 deletions(-) diff --git a/src/anndata/_core/aligned_mapping.py b/src/anndata/_core/aligned_mapping.py index fd5cb7277..e4228b81a 100644 --- a/src/anndata/_core/aligned_mapping.py +++ b/src/anndata/_core/aligned_mapping.py @@ -43,12 +43,15 @@ class AlignedMapping(MutableMapping, ABC): _allow_df: ClassVar[bool] """If this mapping supports heterogeneous DataFrames""" - _view_class: ClassVar[type[AlignedViewMixin]] + _view_class: ClassVar[type[AlignedView]] """The view class for this aligned mapping.""" - _actual_class: ClassVar[type[AlignedActualMixin]] + _actual_class: ClassVar[type[AlignedActual]] """The actual class (which has it’s own data) for this aligned mapping.""" + _parent: AnnData | Raw + """The parent object that this mapping is aligned to.""" + def __repr__(self): return f"{type(self).__name__} with keys: {', '.join(self.keys())}" @@ -124,7 +127,8 @@ def as_dict(self) -> dict: return dict(self) -class AlignedViewMixin: +class AlignedView(AlignedMapping): + # override docstring parent: AnnData """Reference to parent AnnData view""" @@ -142,7 +146,7 @@ def __getitem__(self, key: str) -> V: ElementRef(self.parent, self.attrname, (key,)), ) - def __setitem__(self, key: str, value: V): + def __setitem__(self, key: str, value: V) -> None: value = self._validate_value(value, key) # Validate before mutating warnings.warn( f"Setting element `.{self.attrname}['{key}']` of view, " @@ -153,7 +157,7 @@ def __setitem__(self, key: str, value: V): with view_update(self.parent, self.attrname, ()) as new_mapping: new_mapping[key] = value - def __delitem__(self, key: str): + def __delitem__(self, key: str) -> None: if key not in self: raise KeyError( "'{key!r}' not found in view of {self.attrname}" @@ -177,12 +181,18 @@ def __len__(self) -> int: return len(self.parent_mapping) -class AlignedActualMixin: +class AlignedActual(AlignedMapping): _data: MutableMapping[str, V] """Underlying mapping to the data""" is_view = False + def __init__(self, parent: AnnData | Raw, *, store: MutableMapping[str, V]): + self._parent = parent + self._data = store + for k, v in self._data.items(): + self._data[k] = self._validate_value(v, k) + def __getitem__(self, key: str) -> V: return self._data[key] @@ -259,7 +269,7 @@ def dim_names(self) -> pd.Index: return (self.parent.obs_names, self.parent.var_names)[self._axis] -class AxisArrays(AlignedActualMixin, AxisArraysBase): +class AxisArrays(AlignedActual, AxisArraysBase): def __init__( self, parent: AnnData | Raw, @@ -267,16 +277,13 @@ def __init__( axis: Literal[0, 1], store: MutableMapping[str, V] | AxisArraysBase, ): - self._parent = parent if axis not in {0, 1}: raise ValueError() self._axis = axis - self._data = store - for k, v in self._data.items(): - self._data[k] = self._validate_value(v, k) + super().__init__(parent, store=store) -class AxisArraysView(AlignedViewMixin, AxisArraysBase): +class AxisArraysView(AlignedView, AxisArraysBase): def __init__( self, parent_mapping: AxisArraysBase, @@ -311,15 +318,11 @@ def copy(self) -> Layers: return d -class Layers(AlignedActualMixin, LayersBase): - def __init__(self, parent: AnnData, *, store: MutableMapping[str, V]): - self._parent = parent - self._data = store - for k, v in self._data.items(): - self._data[k] = self._validate_value(v, k) +class Layers(AlignedActual, LayersBase): + pass -class LayersView(AlignedViewMixin, LayersBase): +class LayersView(AlignedView, LayersBase): def __init__( self, parent_mapping: LayersBase, @@ -361,7 +364,7 @@ def dim(self) -> str: return self._dimnames[self._axis] -class PairwiseArrays(AlignedActualMixin, PairwiseArraysBase): +class PairwiseArrays(AlignedActual, PairwiseArraysBase): def __init__( self, parent: AnnData, @@ -369,16 +372,13 @@ def __init__( axis: Literal[0, 1], store: MutableMapping[str, V], ): - self._parent = parent if axis not in {0, 1}: raise ValueError() self._axis = axis - self._data = store - for k, v in self._data.items(): - self._data[k] = self._validate_value(v, k) + super().__init__(parent, store=store) -class PairwiseArraysView(AlignedViewMixin, PairwiseArraysBase): +class PairwiseArraysView(AlignedView, PairwiseArraysBase): def __init__( self, parent_mapping: PairwiseArraysBase, @@ -415,7 +415,7 @@ def construct(self, obj: AnnData, *, store: MutableMapping[str, V]) -> T: return self.cls(obj, axis=self.axis, store=store) @property - def fget(self) -> Callable: + def fget(self) -> Callable[[], None]: """Fake fget for sphinx-autodoc-typehints.""" def fake(): ... diff --git a/tests/test_base.py b/tests/test_base.py index 53d315a52..02e4eabe1 100644 --- a/tests/test_base.py +++ b/tests/test_base.py @@ -703,6 +703,9 @@ def assert_eq_not_id(a, b): map_sprs = getattr(adata_sparse, attr) map_copy = getattr(adata_copy, attr) assert map_sprs is not map_copy + if attr not in {"obs", "var"}: + # check that we don’t create too many references + assert getattr(adata_copy, f"_{attr}") is map_copy._data assert_eq_not_id(map_sprs.keys(), map_copy.keys()) for key in map_sprs.keys(): assert_eq_not_id(map_sprs[key], map_copy[key]) diff --git a/tests/test_layers.py b/tests/test_layers.py index f2a92eb73..ba1f96e49 100644 --- a/tests/test_layers.py +++ b/tests/test_layers.py @@ -8,7 +8,7 @@ import pytest from numba.core.errors import NumbaDeprecationWarning -from anndata import AnnData, read_h5ad, read_loom +from anndata import AnnData, ImplicitModificationWarning, read_h5ad, read_loom from anndata.tests.helpers import gen_typed_df_t2_size from testing.anndata._helpers import xfail_if_numpy2_loompy @@ -41,7 +41,8 @@ def test_views(): assert adata_view.layers.keys() == adata.layers.keys() assert (adata_view.layers["S"] == adata.layers["S"][1:, 1:]).all() - adata_view.layers["T"] = X[1:, 1:] + with pytest.warns(ImplicitModificationWarning): + adata_view.layers["T"] = X[1:, 1:] assert not adata_view.layers.is_view assert not adata_view.is_view @@ -106,6 +107,9 @@ def test_backed(): def test_copy(): adata = AnnData(X=X, layers=dict(L=L.copy())) bdata = adata.copy() + # check that we don’t create too many references + assert bdata._layers is bdata.layers._data + # check that we have a copy adata.layers["L"] += 10 assert np.all(adata.layers["L"] != bdata.layers["L"]) # 201 From 866abbdac60abbd1b636346f5ff7740506c6c031 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Thu, 4 Jul 2024 13:59:04 +0200 Subject: [PATCH 28/33] Simplify copy --- src/anndata/_core/aligned_mapping.py | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/src/anndata/_core/aligned_mapping.py b/src/anndata/_core/aligned_mapping.py index e4228b81a..becdaf89e 100644 --- a/src/anndata/_core/aligned_mapping.py +++ b/src/anndata/_core/aligned_mapping.py @@ -18,7 +18,7 @@ if TYPE_CHECKING: from collections.abc import Iterable, Iterator, Mapping, Sequence - from typing import ClassVar, Literal, Self + from typing import ClassVar, Literal import numpy as np from scipy.sparse import spmatrix @@ -108,15 +108,11 @@ def is_view(self) -> bool: ... def parent(self) -> AnnData | Raw: return self._parent - def copy(self) -> Self: - d = self._actual_class(self.parent, axis=self._axis, store={}) - for k, v in self.items(): - if isinstance(v, AwkArray): - # Shallow copy since awkward array buffers are immutable - d[k] = copy(v) - else: - d[k] = v.copy() - return d + def copy(self) -> dict[str, V]: + # Shallow copy for awkward array since their buffers are immutable + return { + k: copy(v) if isinstance(v, AwkArray) else v.copy() for k, v in self.items() + } def _view(self, parent: AnnData, subset_idx: I): """Returns a subset copy-on-write view of the object.""" @@ -310,13 +306,6 @@ class LayersBase(AlignedMapping): attrname = "layers" axes = (0, 1) - # TODO: I thought I had a more elegant solution to overriding this... - def copy(self) -> Layers: - d = self._actual_class(self.parent, store={}) - for k, v in self.items(): - d[k] = v.copy() - return d - class Layers(AlignedActual, LayersBase): pass From 015d6ad11b5229484ea58f373eaff5bb5d76bece Mon Sep 17 00:00:00 2001 From: Phil Schaf Date: Mon, 22 Jul 2024 13:31:12 +0200 Subject: [PATCH 29/33] docs and typing --- src/anndata/_core/aligned_mapping.py | 179 +++++++++++++-------------- src/anndata/_core/anndata.py | 24 +++- src/anndata/_core/raw.py | 29 +++-- src/anndata/tests/helpers.py | 4 +- 4 files changed, 119 insertions(+), 117 deletions(-) diff --git a/src/anndata/_core/aligned_mapping.py b/src/anndata/_core/aligned_mapping.py index 166ad95f1..d6a285601 100644 --- a/src/anndata/_core/aligned_mapping.py +++ b/src/anndata/_core/aligned_mapping.py @@ -2,11 +2,14 @@ import warnings from abc import ABC, abstractmethod -from collections.abc import Collection, MutableMapping, Sequence +from collections.abc import MutableMapping, Sequence from copy import copy +from dataclasses import dataclass from typing import TYPE_CHECKING, Generic, TypeVar, Union +import numpy as np import pandas as pd +from scipy.sparse import spmatrix from .._warnings import ExperimentalFeatureWarning, ImplicitModificationWarning from ..compat import AwkArray @@ -17,24 +20,24 @@ from .views import as_view, view_update if TYPE_CHECKING: - from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence - from typing import ClassVar, Literal - - import numpy as np - from scipy.sparse import spmatrix + from collections.abc import Callable, Iterable, Iterator, Mapping + from typing import ClassVar, Literal, Self from .anndata import AnnData from .raw import Raw - OneDIdx = Union[Sequence[int], Sequence[bool], slice] - TwoDIdx = tuple[OneDIdx, OneDIdx] - I = TypeVar("I", OneDIdx, TwoDIdx, covariant=True) - # TODO: pd.DataFrame only allowed in AxisArrays? - V = Union[pd.DataFrame, spmatrix, np.ndarray] +OneDIdx = Union[Sequence[int], Sequence[bool], slice] +TwoDIdx = tuple[OneDIdx, OneDIdx] +# TODO: pd.DataFrame only allowed in AxisArrays? +Value = Union[pd.DataFrame, spmatrix, np.ndarray] + +P = TypeVar("P", bound="AlignedMappingBase") +"""Parent mapping an AlignedView is based on.""" +I = TypeVar("I", OneDIdx, TwoDIdx, covariant=True) -class AlignedMapping(MutableMapping, ABC): +class AlignedMappingBase(MutableMapping[str, Value], ABC, Generic[I]): """\ An abstract base class for Mappings containing array-like values aligned to either one or both AnnData axes. @@ -58,7 +61,7 @@ def __repr__(self): def _ipython_key_completions_(self) -> list[str]: return list(self.keys()) - def _validate_value(self, val: V, key: str) -> V: + def _validate_value(self, val: Value, key: str) -> Value: """Raises an error if value is invalid""" if isinstance(val, AwkArray): warn_once( @@ -108,13 +111,13 @@ def is_view(self) -> bool: ... def parent(self) -> AnnData | Raw: return self._parent - def copy(self) -> dict[str, V]: + def copy(self) -> dict[str, Value]: # Shallow copy for awkward array since their buffers are immutable return { k: copy(v) if isinstance(v, AwkArray) else v.copy() for k, v in self.items() } - def _view(self, parent: AnnData, subset_idx: I): + def _view(self, parent: AnnData, subset_idx: I) -> AlignedView[Self, I]: """Returns a subset copy-on-write view of the object.""" return self._view_class(self, parent, subset_idx) @@ -123,7 +126,9 @@ def as_dict(self) -> dict: return dict(self) -class AlignedView(AlignedMapping): +class AlignedView(AlignedMappingBase[I], Generic[P, I]): + is_view: ClassVar[Literal[True]] = True + # override docstring parent: AnnData """Reference to parent AnnData view""" @@ -131,18 +136,27 @@ class AlignedView(AlignedMapping): attrname: str """What attribute in the parent is this?""" - parent_mapping: Mapping[str, V] + parent_mapping: Mapping[str, Value] """The object this is a view of.""" - is_view = True + subset_idx: I + """The subset of the parent to view.""" + + def __init__(self, parent_mapping: P, parent_view: AnnData, subset_idx: I): + self.parent_mapping = parent_mapping + self._parent = parent_view + self.subset_idx = subset_idx + if hasattr(parent_mapping, "_axis"): + # LayersBase has no _axis, the rest does + self._axis = parent_mapping._axis # type: ignore - def __getitem__(self, key: str) -> V: + def __getitem__(self, key: str) -> Value: return as_view( _subset(self.parent_mapping[key], self.subset_idx), ElementRef(self.parent, self.attrname, (key,)), ) - def __setitem__(self, key: str, value: V) -> None: + def __setitem__(self, key: str, value: Value) -> None: value = self._validate_value(value, key) # Validate before mutating warnings.warn( f"Setting element `.{self.attrname}['{key}']` of view, " @@ -177,22 +191,22 @@ def __len__(self) -> int: return len(self.parent_mapping) -class AlignedActual(AlignedMapping): - _data: MutableMapping[str, V] - """Underlying mapping to the data""" +class AlignedActual(AlignedMappingBase): + is_view: ClassVar[Literal[False]] = False - is_view = False + _data: MutableMapping[str, Value] + """Underlying mapping to the data""" - def __init__(self, parent: AnnData | Raw, *, store: MutableMapping[str, V]): + def __init__(self, parent: AnnData | Raw, *, store: MutableMapping[str, Value]): self._parent = parent self._data = store for k, v in self._data.items(): self._data[k] = self._validate_value(v, k) - def __getitem__(self, key: str) -> V: + def __getitem__(self, key: str) -> Value: return self._data[key] - def __setitem__(self, key: str, value: V): + def __setitem__(self, key: str, value: Value): value = self._validate_value(value, key) self._data[key] = value @@ -209,14 +223,14 @@ def __len__(self) -> int: return len(self._data) -class AxisArraysBase(AlignedMapping): +class AxisArraysBase(AlignedMappingBase): """\ Mapping of key→array-like, where array-like is aligned to an axis of parent AnnData. """ - _allow_df = True - _dimnames = ("obs", "var") + _allow_df: ClassVar = True + _dimnames: ClassVar = ("obs", "var") _axis: Literal[0, 1] @@ -243,12 +257,8 @@ def to_df(self) -> pd.DataFrame: df[f"{key}{icolumn + 1}"] = column return df - def _validate_value(self, val: V, key: str) -> V: - if ( - hasattr(val, "index") - and isinstance(val.index, Collection) - and not val.index.equals(self.dim_names) - ): + def _validate_value(self, val: Value, key: str) -> Value: + if isinstance(val, pd.DataFrame) and not val.index.equals(self.dim_names): # Could probably also re-order index if it’s contained try: pd.testing.assert_index_equal(val.index, self.dim_names) @@ -271,7 +281,7 @@ def __init__( parent: AnnData | Raw, *, axis: Literal[0, 1], - store: MutableMapping[str, V] | AxisArraysBase, + store: MutableMapping[str, Value] | AxisArraysBase, ): if axis not in {0, 1}: raise ValueError() @@ -279,62 +289,45 @@ def __init__( super().__init__(parent, store=store) -class AxisArraysView(AlignedView, AxisArraysBase): - def __init__( - self, - parent_mapping: AxisArraysBase, - parent_view: AnnData, - subset_idx: OneDIdx, - ): - self.parent_mapping = parent_mapping - self._parent = parent_view - self.subset_idx = subset_idx - self._axis = parent_mapping._axis +class AxisArraysView(AlignedView[AxisArraysBase, OneDIdx], AxisArraysBase): + pass AxisArraysBase._view_class = AxisArraysView AxisArraysBase._actual_class = AxisArrays -class LayersBase(AlignedMapping): +class LayersBase(AlignedMappingBase): """\ Mapping of key: array-like, where array-like is aligned to both axes of the parent anndata. """ - _allow_df = False - attrname = "layers" - axes = (0, 1) + _allow_df: ClassVar = False + attrname: ClassVar[Literal["layers"]] = "layers" + axes: ClassVar[tuple[Literal[0], Literal[1]]] = (0, 1) class Layers(AlignedActual, LayersBase): pass -class LayersView(AlignedView, LayersBase): - def __init__( - self, - parent_mapping: LayersBase, - parent_view: AnnData, - subset_idx: TwoDIdx, - ): - self.parent_mapping = parent_mapping - self._parent = parent_view - self.subset_idx = subset_idx +class LayersView(AlignedView[LayersBase, TwoDIdx], LayersBase): + pass LayersBase._view_class = LayersView LayersBase._actual_class = Layers -class PairwiseArraysBase(AlignedMapping): +class PairwiseArraysBase(AlignedMappingBase): """\ Mapping of key: array-like, where both axes of array-like are aligned to one axis of the parent anndata. """ - _allow_df = False - _dimnames = ("obs", "var") + _allow_df: ClassVar = False + _dimnames: ClassVar = ("obs", "var") _axis: Literal[0, 1] @@ -345,7 +338,7 @@ def attrname(self) -> str: @property def axes(self) -> tuple[Literal[0], Literal[0]] | tuple[Literal[1], Literal[1]]: """Axes of the parent this is aligned to""" - return self._axis, self._axis + return self._axis, self._axis # type: ignore @property def dim(self) -> str: @@ -359,7 +352,7 @@ def __init__( parent: AnnData, *, axis: Literal[0, 1], - store: MutableMapping[str, V], + store: MutableMapping[str, Value], ): if axis not in {0, 1}: raise ValueError() @@ -367,38 +360,36 @@ def __init__( super().__init__(parent, store=store) -class PairwiseArraysView(AlignedView, PairwiseArraysBase): - def __init__( - self, - parent_mapping: PairwiseArraysBase, - parent_view: AnnData, - subset_idx: OneDIdx, - ): - self.parent_mapping = parent_mapping - self._parent = parent_view - self.subset_idx = subset_idx - self._axis = parent_mapping._axis +class PairwiseArraysView(AlignedView[PairwiseArraysBase, OneDIdx], PairwiseArraysBase): + pass PairwiseArraysBase._view_class = PairwiseArraysView PairwiseArraysBase._actual_class = PairwiseArrays +AlignedMapping = Union[ + AxisArrays, AxisArraysView, Layers, LayersView, PairwiseArrays, PairwiseArraysView +] T = TypeVar("T", bound=AlignedMapping) +"""Pair of types to be aligned.""" +@dataclass class AlignedMappingProperty(property, Generic[T]): - def __init__( - self, - name: str, - cls: type[T], - axis: Literal[0, 1] | None = None, - ): - self.name = name - self.axis = axis - self.cls = cls + """A :class:`property` that creates an ephemeral AlignedMapping. - def construct(self, obj: AnnData, *, store: MutableMapping[str, V]) -> T: + The actual data is stored as `f'_{self.name}'` in the parent object. + """ + + name: str + """Name of the attribute in the parent object.""" + cls: type[T] + """Concrete type that will be constructed.""" + axis: Literal[0, 1] | None = None + """Axis of the parent to align to.""" + + def construct(self, obj: AnnData, *, store: MutableMapping[str, Value]) -> T: if self.axis is None: return self.cls(obj, store=store) return self.cls(obj, axis=self.axis, store=store) @@ -415,19 +406,19 @@ def fake(): ... return fake def __get__(self, obj: None | AnnData, objtype: type | None = None) -> T: - if obj is None: # needs to return a `property`, e.g. for Sphinx + if obj is None: + # When accessed from the class, e.g. via `AnnData.obs`, + # this needs to return a `property` instance, e.g. for Sphinx return self # type: ignore if not obj.is_view: return self.construct(obj, store=getattr(obj, f"_{self.name}")) parent_anndata = obj._adata_ref idxs = (obj._oidx, obj._vidx) - parent_aligned_mapping: AlignedMapping = getattr(parent_anndata, self.name) - return parent_aligned_mapping._view( - obj, tuple(idxs[ax] for ax in parent_aligned_mapping.axes) - ) + parent: AlignedMapping = getattr(parent_anndata, self.name) + return parent._view(obj, tuple(idxs[ax] for ax in parent.axes)) def __set__( - self, obj: AnnData, value: Mapping[str, V] | Iterable[tuple[str, V]] + self, obj: AnnData, value: Mapping[str, Value] | Iterable[tuple[str, Value]] ) -> None: value = convert_to_dict(value) _ = self.construct(obj, store=value) # Validate diff --git a/src/anndata/_core/anndata.py b/src/anndata/_core/anndata.py index 1af2d517e..a285c8ba9 100644 --- a/src/anndata/_core/anndata.py +++ b/src/anndata/_core/anndata.py @@ -29,7 +29,15 @@ from ..utils import axis_len, deprecated, ensure_df_homogeneous from .access import ElementRef from .aligned_df import _gen_dataframe -from .aligned_mapping import AlignedMappingProperty, AxisArrays, Layers, PairwiseArrays +from .aligned_mapping import ( + AlignedMappingProperty, + AxisArrays, + AxisArraysView, + Layers, + LayersView, + PairwiseArrays, + PairwiseArraysView, +) from .file_backing import AnnDataFileManager, to_memory from .index import _normalize_indices, _subset, get_vector from .raw import Raw @@ -649,7 +657,7 @@ def X(self, value: np.ndarray | sparse.spmatrix | SpArray | None): def X(self): self.X = None - layers = AlignedMappingProperty("layers", Layers) + layers = AlignedMappingProperty[Layers | LayersView]("layers", Layers) """\ Dictionary-like object with values of the same dimensions as :attr:`X`. @@ -857,7 +865,7 @@ def uns(self, value: MutableMapping): def uns(self): self.uns = OrderedDict() - obsm = AlignedMappingProperty("obsm", AxisArrays, 0) + obsm = AlignedMappingProperty[AxisArrays | AxisArraysView]("obsm", AxisArrays, 0) """\ Multi-dimensional annotation of observations (mutable structured :class:`~numpy.ndarray`). @@ -867,7 +875,7 @@ def uns(self): Is sliced with `data` and `obs` but behaves otherwise like a :term:`mapping`. """ - varm = AlignedMappingProperty("varm", AxisArrays, 1) + varm = AlignedMappingProperty[AxisArrays | AxisArraysView]("varm", AxisArrays, 1) """\ Multi-dimensional annotation of variables/features (mutable structured :class:`~numpy.ndarray`). @@ -877,7 +885,9 @@ def uns(self): Is sliced with `data` and `var` but behaves otherwise like a :term:`mapping`. """ - obsp = AlignedMappingProperty("obsp", PairwiseArrays, 0) + obsp = AlignedMappingProperty[PairwiseArrays | PairwiseArraysView]( + "obsp", PairwiseArrays, 0 + ) """\ Pairwise annotation of observations, a mutable mapping with array-like values. @@ -887,7 +897,9 @@ def uns(self): Is sliced with `data` and `obs` but behaves otherwise like a :term:`mapping`. """ - varp = AlignedMappingProperty("varp", PairwiseArrays, 1) + varp = AlignedMappingProperty[PairwiseArrays | PairwiseArraysView]( + "varp", PairwiseArrays, 1 + ) """\ Pairwise annotation of variables/features, a mutable mapping with array-like values. diff --git a/src/anndata/_core/raw.py b/src/anndata/_core/raw.py index 2bb2dc242..d49305755 100644 --- a/src/anndata/_core/raw.py +++ b/src/anndata/_core/raw.py @@ -9,12 +9,13 @@ from ..compat import CupyArray, CupySparseMatrix from .aligned_df import _gen_dataframe -from .aligned_mapping import AlignedMappingProperty, AxisArrays +from .aligned_mapping import AlignedMappingProperty, AxisArrays, AxisArraysView from .index import _normalize_index, _subset, get_vector, unpack_index from .sparse_dataset import sparse_dataset if TYPE_CHECKING: from collections.abc import Mapping, Sequence + from typing import ClassVar from scipy import sparse @@ -24,6 +25,8 @@ # TODO: Implement views for Raw class Raw: + is_view: ClassVar = False + def __init__( self, adata: AnnData, @@ -89,29 +92,29 @@ def X(self) -> BaseCompressedSparseDataset | np.ndarray | sparse.spmatrix: return X @property - def shape(self): + def shape(self) -> tuple[int, int]: return self.n_obs, self.n_vars @property - def var(self): + def var(self) -> pd.DataFrame: return self._var @property - def n_vars(self): + def n_vars(self) -> int: return self._var.shape[0] @property - def n_obs(self): + def n_obs(self) -> int: return self._n_obs - varm = AlignedMappingProperty("varm", AxisArrays, 1) + varm = AlignedMappingProperty[AxisArrays | AxisArraysView]("varm", AxisArrays, 1) @property - def var_names(self): + def var_names(self) -> pd.Index[str]: return self.var.index @property - def obs_names(self): + def obs_names(self) -> pd.Index[str]: return self._adata.obs_names def __getitem__(self, index): @@ -135,11 +138,7 @@ def __getitem__(self, index): new.varm = self.varm._view(_RawViewHack(self, vidx), (vidx,)).copy() return new - @property - def is_view(self): - return False - - def __str__(self): + def __str__(self) -> str: descr = f"Raw AnnData with n_obs × n_vars = {self.n_obs} × {self.n_vars}" for attr in ["var", "varm"]: keys = getattr(self, attr).keys() @@ -147,7 +146,7 @@ def __str__(self): descr += f"\n {attr}: {str(list(keys))[1:-1]}" return descr - def copy(self): + def copy(self) -> Raw: return Raw( self._adata, X=self.X.copy(), @@ -155,7 +154,7 @@ def copy(self): varm=None if self._varm is None else self._varm.copy(), ) - def to_adata(self): + def to_adata(self) -> AnnData: """Create full AnnData object.""" from anndata import AnnData diff --git a/src/anndata/tests/helpers.py b/src/anndata/tests/helpers.py index b7c2d06bc..2e5521316 100644 --- a/src/anndata/tests/helpers.py +++ b/src/anndata/tests/helpers.py @@ -17,7 +17,7 @@ from scipy import sparse from anndata import AnnData, ExperimentalFeatureWarning, Raw -from anndata._core.aligned_mapping import AlignedMapping +from anndata._core.aligned_mapping import AlignedMappingBase from anndata._core.sparse_dataset import BaseCompressedSparseDataset from anndata._core.views import ArrayView from anndata.compat import ( @@ -585,7 +585,7 @@ def assert_equal_mapping(a, b, exact=False, elem_name=None): assert_equal(a[k], b[k], exact, f"{elem_name}/{k}") -@assert_equal.register(AlignedMapping) +@assert_equal.register(AlignedMappingBase) def assert_equal_aligned_mapping(a, b, exact=False, elem_name=None): a_indices = (a.parent.obs_names, a.parent.var_names) b_indices = (b.parent.obs_names, b.parent.var_names) From 3a1a007b88d4169b34a1b26a5ed1f889a2ce03b9 Mon Sep 17 00:00:00 2001 From: Phil Schaf Date: Mon, 22 Jul 2024 13:34:07 +0200 Subject: [PATCH 30/33] fix parent_mapping type --- src/anndata/_core/aligned_mapping.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/anndata/_core/aligned_mapping.py b/src/anndata/_core/aligned_mapping.py index d6a285601..e654c09ec 100644 --- a/src/anndata/_core/aligned_mapping.py +++ b/src/anndata/_core/aligned_mapping.py @@ -136,7 +136,7 @@ class AlignedView(AlignedMappingBase[I], Generic[P, I]): attrname: str """What attribute in the parent is this?""" - parent_mapping: Mapping[str, Value] + parent_mapping: P """The object this is a view of.""" subset_idx: I From 73fc94c7bfc234c177600344bd7c7c7339c98a17 Mon Sep 17 00:00:00 2001 From: Phil Schaf Date: Mon, 22 Jul 2024 13:38:17 +0200 Subject: [PATCH 31/33] Fix I --- src/anndata/_core/aligned_mapping.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/anndata/_core/aligned_mapping.py b/src/anndata/_core/aligned_mapping.py index e654c09ec..26c89d9f2 100644 --- a/src/anndata/_core/aligned_mapping.py +++ b/src/anndata/_core/aligned_mapping.py @@ -34,10 +34,10 @@ P = TypeVar("P", bound="AlignedMappingBase") """Parent mapping an AlignedView is based on.""" -I = TypeVar("I", OneDIdx, TwoDIdx, covariant=True) +I = TypeVar("I", OneDIdx, TwoDIdx) -class AlignedMappingBase(MutableMapping[str, Value], ABC, Generic[I]): +class AlignedMappingBase(MutableMapping[str, Value], ABC): """\ An abstract base class for Mappings containing array-like values aligned to either one or both AnnData axes. @@ -126,7 +126,7 @@ def as_dict(self) -> dict: return dict(self) -class AlignedView(AlignedMappingBase[I], Generic[P, I]): +class AlignedView(AlignedMappingBase, Generic[P, I]): is_view: ClassVar[Literal[True]] = True # override docstring From ba61e2bd17182c37b68ebd99872608a2b8b65e22 Mon Sep 17 00:00:00 2001 From: Phil Schaf Date: Mon, 22 Jul 2024 13:48:52 +0200 Subject: [PATCH 32/33] fix docs --- docs/_templates/autosummary/class.rst | 4 ++-- docs/conf.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/_templates/autosummary/class.rst b/docs/_templates/autosummary/class.rst index b4e7370aa..8fe1d69d0 100644 --- a/docs/_templates/autosummary/class.rst +++ b/docs/_templates/autosummary/class.rst @@ -13,7 +13,7 @@ .. autosummary:: :toctree: . {% for item in attributes %} - ~{{ fullname }}.{{ item }} + ~{{ name }}.{{ item }} {%- endfor %} {% endif %} {% endblock %} @@ -26,7 +26,7 @@ :toctree: . {% for item in methods %} {%- if item != '__init__' %} - ~{{ fullname }}.{{ item }} + ~{{ name }}.{{ item }} {%- endif -%} {%- endfor %} {% endif %} diff --git a/docs/conf.py b/docs/conf.py index ec253fc68..de28bc6c5 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -27,7 +27,7 @@ # default settings templates_path = ["_templates"] html_static_path = ["_static"] -source_suffix = [".rst", ".md"] +source_suffix = {".rst": "restructuredtext", ".md": "markdown"} master_doc = "index" default_role = "literal" exclude_patterns = [ From e138fc99fa4d5ab7e9351a45b905e13a4fc2415f Mon Sep 17 00:00:00 2001 From: Phil Schaf Date: Mon, 22 Jul 2024 13:53:14 +0200 Subject: [PATCH 33/33] fix 3.9 --- src/anndata/_core/anndata.py | 31 +++++++++++++++---------------- src/anndata/_core/raw.py | 7 +++++-- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/src/anndata/_core/anndata.py b/src/anndata/_core/anndata.py index a285c8ba9..7da29e4a4 100644 --- a/src/anndata/_core/anndata.py +++ b/src/anndata/_core/anndata.py @@ -29,15 +29,7 @@ from ..utils import axis_len, deprecated, ensure_df_homogeneous from .access import ElementRef from .aligned_df import _gen_dataframe -from .aligned_mapping import ( - AlignedMappingProperty, - AxisArrays, - AxisArraysView, - Layers, - LayersView, - PairwiseArrays, - PairwiseArraysView, -) +from .aligned_mapping import AlignedMappingProperty, AxisArrays, Layers, PairwiseArrays from .file_backing import AnnDataFileManager, to_memory from .index import _normalize_indices, _subset, get_vector from .raw import Raw @@ -55,6 +47,7 @@ from os import PathLike from typing import Any, Literal + from .aligned_mapping import AxisArraysView, LayersView, PairwiseArraysView from .index import Index, Index1D from .views import ArrayView @@ -657,7 +650,9 @@ def X(self, value: np.ndarray | sparse.spmatrix | SpArray | None): def X(self): self.X = None - layers = AlignedMappingProperty[Layers | LayersView]("layers", Layers) + layers: AlignedMappingProperty[Layers | LayersView] = AlignedMappingProperty( + "layers", Layers + ) """\ Dictionary-like object with values of the same dimensions as :attr:`X`. @@ -865,7 +860,9 @@ def uns(self, value: MutableMapping): def uns(self): self.uns = OrderedDict() - obsm = AlignedMappingProperty[AxisArrays | AxisArraysView]("obsm", AxisArrays, 0) + obsm: AlignedMappingProperty[AxisArrays | AxisArraysView] = AlignedMappingProperty( + "obsm", AxisArrays, 0 + ) """\ Multi-dimensional annotation of observations (mutable structured :class:`~numpy.ndarray`). @@ -875,7 +872,9 @@ def uns(self): Is sliced with `data` and `obs` but behaves otherwise like a :term:`mapping`. """ - varm = AlignedMappingProperty[AxisArrays | AxisArraysView]("varm", AxisArrays, 1) + varm: AlignedMappingProperty[AxisArrays | AxisArraysView] = AlignedMappingProperty( + "varm", AxisArrays, 1 + ) """\ Multi-dimensional annotation of variables/features (mutable structured :class:`~numpy.ndarray`). @@ -885,8 +884,8 @@ def uns(self): Is sliced with `data` and `var` but behaves otherwise like a :term:`mapping`. """ - obsp = AlignedMappingProperty[PairwiseArrays | PairwiseArraysView]( - "obsp", PairwiseArrays, 0 + obsp: AlignedMappingProperty[PairwiseArrays | PairwiseArraysView] = ( + AlignedMappingProperty("obsp", PairwiseArrays, 0) ) """\ Pairwise annotation of observations, @@ -897,8 +896,8 @@ def uns(self): Is sliced with `data` and `obs` but behaves otherwise like a :term:`mapping`. """ - varp = AlignedMappingProperty[PairwiseArrays | PairwiseArraysView]( - "varp", PairwiseArrays, 1 + varp: AlignedMappingProperty[PairwiseArrays | PairwiseArraysView] = ( + AlignedMappingProperty("varp", PairwiseArrays, 1) ) """\ Pairwise annotation of variables/features, diff --git a/src/anndata/_core/raw.py b/src/anndata/_core/raw.py index d49305755..7237c06b4 100644 --- a/src/anndata/_core/raw.py +++ b/src/anndata/_core/raw.py @@ -9,7 +9,7 @@ from ..compat import CupyArray, CupySparseMatrix from .aligned_df import _gen_dataframe -from .aligned_mapping import AlignedMappingProperty, AxisArrays, AxisArraysView +from .aligned_mapping import AlignedMappingProperty, AxisArrays from .index import _normalize_index, _subset, get_vector, unpack_index from .sparse_dataset import sparse_dataset @@ -19,6 +19,7 @@ from scipy import sparse + from .aligned_mapping import AxisArraysView from .anndata import AnnData from .sparse_dataset import BaseCompressedSparseDataset @@ -107,7 +108,9 @@ def n_vars(self) -> int: def n_obs(self) -> int: return self._n_obs - varm = AlignedMappingProperty[AxisArrays | AxisArraysView]("varm", AxisArrays, 1) + varm: AlignedMappingProperty[AxisArrays | AxisArraysView] = AlignedMappingProperty( + "varm", AxisArrays, 1 + ) @property def var_names(self) -> pd.Index[str]: