Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(fix): clarify public backed sparse docstring/api #1608

Merged
merged 24 commits into from
Aug 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
a6969b7
(chore): export `read_elem` and `write_elem` from the main package
ilan-gold Aug 14, 2024
1cc2b48
(chore): pr number
ilan-gold Aug 14, 2024
c89238d
(fix): agnostic way of importing
ilan-gold Aug 14, 2024
31c7ad0
(fix): add `RWAble` to `api.md`
ilan-gold Aug 14, 2024
99e0216
(fix): `md` file import
ilan-gold Aug 14, 2024
efe50b2
(fix): clarify public backed sparse docstring/api
ilan-gold Aug 26, 2024
7868c60
Merge branch 'ig/{read,write}_elem_out_of_experimental' into ig/csr_c…
ilan-gold Aug 26, 2024
ba9e0a0
(chore): small fixes
ilan-gold Aug 26, 2024
169297c
(fix): `format` + `to_memory`
ilan-gold Aug 26, 2024
08b741b
Merge branch 'main' into ig/{read,write}_elem_out_of_experimental
ilan-gold Aug 26, 2024
db1a01c
Merge branch 'ig/{read,write}_elem_out_of_experimental' into ig/csr_c…
ilan-gold Aug 26, 2024
f6b5792
(chore): remove deprecation tests + `SparseDataset`
ilan-gold Aug 26, 2024
5fe5887
(chore): clean up private/public api
ilan-gold Aug 26, 2024
0303761
(fix): `test_append_overflow_check` used `indptr`
ilan-gold Aug 26, 2024
ac354e1
Merge branch 'main' into ig/{read,write}_elem_out_of_experimental
ilan-gold Aug 28, 2024
54b27d7
(fix): export `InMemoryElem`
ilan-gold Aug 28, 2024
568620c
(chore): release note
ilan-gold Aug 28, 2024
6786d77
Merge branch 'ig/{read,write}_elem_out_of_experimental' into ig/csr_c…
ilan-gold Aug 28, 2024
9185bea
(chore): move `InMemoryElem` to the "extras" section
ilan-gold Aug 28, 2024
765ba23
Merge branch 'ig/{read,write}_elem_out_of_experimental' into ig/csr_c…
ilan-gold Aug 28, 2024
31c3f53
Merge branch 'main' into ig/csr_csc_public_prep
ilan-gold Aug 30, 2024
77f6be8
Merge branch 'main' into ig/csr_csc_public_prep
ilan-gold Aug 30, 2024
2be51df
Update src/anndata/_core/sparse_dataset.py
flying-sheep Aug 30, 2024
94746c2
(fix): remove dead tests
ilan-gold Aug 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 40 additions & 56 deletions src/anndata/_core/sparse_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@
from collections.abc import Sequence
from typing import Literal

from scipy.sparse import spmatrix

from .._types import GroupStorageType
from .index import Index

Expand Down Expand Up @@ -353,9 +355,8 @@


class BaseCompressedSparseDataset(ABC):
"""Analogous to :class:`h5py.Dataset <h5py:Dataset>` or `zarr.Array`, but for sparse matrices."""

format: Literal["csr", "csc"]
"""The format of the sparse matrix."""
_group: GroupStorageType

def __init__(self, group: GroupStorageType):
Expand All @@ -378,6 +379,7 @@

@property
def backend(self) -> Literal["zarr", "hdf5"]:
"""Which file type is used on-disk."""
if isinstance(self.group, ZarrGroup):
return "zarr"
elif isinstance(self.group, H5Group):
Expand All @@ -387,6 +389,7 @@

@property
def dtype(self) -> np.dtype:
"""The :class:`numpy.dtype` of the `data` attribute of the sparse matrix."""
return self.group["data"].dtype

@classmethod
Expand All @@ -395,37 +398,19 @@
assert group_format == cls.format

@property
def format_str(self) -> Literal["csr", "csc"]:
"""DEPRECATED Use .format instead."""
warnings.warn(
"The attribute .format_str is deprecated and will be removed in the anndata 0.11.0. "
"Please use .format instead.",
FutureWarning,
)
return self.format

@property
def name(self) -> str:
def _name(self) -> str:
"""Name of the group."""
return self.group.name

@property
def shape(self) -> tuple[int, int]:
"""Shape of the matrix read off disk."""
shape = _read_attr(self.group.attrs, "shape", None)
if shape is None:
# TODO warn
shape = self.group.attrs.get("h5sparse_shape")
return tuple(map(int, shape))

@property
def value(self) -> ss.csr_matrix | ss.csc_matrix:
"""DEPRECATED Use .to_memory() instead."""
warnings.warn(
"The .value attribute is deprecated and will be removed in the anndata 0.11.0. "
"Please use .to_memory() instead.",
FutureWarning,
)
return self.to_memory()

def __repr__(self) -> str:
return f"{type(self).__name__}: backend {self.backend}, shape {self.shape}, data_dtype {self.dtype}"

Expand Down Expand Up @@ -483,7 +468,25 @@
mock_matrix[row, col] = value

# TODO: split to other classes?
def append(self, sparse_matrix: _cs_matrix | SpArray) -> None:
def append(self, sparse_matrix: spmatrix | SpArray) -> None:
"""Append an in-memory or on-disk sparse matrix to the current object's store.

Parameters
----------
sparse_matrix
The matrix to append.

Raises
------
NotImplementedError
If the matrix to append is not one of :class:`~scipy.sparse.csr_array`, :class:`~scipy.sparse.csc_array`, :class:`~scipy.sparse.csr_matrix`, or :class:`~scipy.sparse.csc_matrix`.
ValueError
If both the on-disk and to-append matrices are not of the same format i.e., `csr` or `csc`.
OverflowError
If the underlying data store has a 32 bit indptr, and the new matrix is too large to fit in it i.e., would cause a 64 bit `indptr` to be written.
AssertionError
If the on-disk data does not have `csc` or `csr` format.
"""
# Prep variables
shape = self.shape
if isinstance(sparse_matrix, BaseCompressedSparseDataset):
Expand Down Expand Up @@ -546,7 +549,7 @@
)
# Clear cached property
if hasattr(self, "indptr"):
del self.indptr
del self._indptr

Check warning on line 552 in src/anndata/_core/sparse_dataset.py

View check run for this annotation

Codecov / codecov/patch

src/anndata/_core/sparse_dataset.py#L552

Added line #L552 was not covered by tests

# indices
indices = self.group["indices"]
Expand All @@ -555,7 +558,7 @@
indices[orig_data_size:] = sparse_matrix.indices

@cached_property
def indptr(self) -> np.ndarray:
def _indptr(self) -> np.ndarray:
"""\
Other than `data` and `indices`, this is only as long as the major axis

Expand All @@ -569,21 +572,29 @@
mtx = format_class(self.shape, dtype=self.dtype)
mtx.data = self.group["data"]
mtx.indices = self.group["indices"]
mtx.indptr = self.indptr
mtx.indptr = self._indptr
return mtx

def to_memory(self) -> ss.csr_matrix | ss.csc_matrix:
def to_memory(self) -> spmatrix | SpArray:
"""Returns an in-memory representation of the sparse matrix.

Returns
-------
The in-memory representation of the sparse matrix.
"""
format_class = get_memory_class(self.format)
mtx = format_class(self.shape, dtype=self.dtype)
mtx.data = self.group["data"][...]
mtx.indices = self.group["indices"][...]
mtx.indptr = self.indptr
mtx.indptr = self._indptr
return mtx


_sparse_dataset_doc = """\
On disk {format} sparse matrix.

Analogous to :class:`h5py.Dataset` or :class:`zarr.core.Array`, but for sparse matrices.

Parameters
----------
group
Expand Down Expand Up @@ -662,30 +673,3 @@
@_subset.register(BaseCompressedSparseDataset)
def subset_sparsedataset(d, subset_idx):
return d[subset_idx]


## Backwards compat

_sparsedataset_depr_msg = """\
SparseDataset is deprecated and will be removed in late 2024. It has been replaced by the public classes CSRDataset and CSCDataset.

For instance checks, use `isinstance(X, (anndata.CSRDataset, anndata.CSCDataset))` instead.

For creation, use `anndata.experimental.sparse_dataset(X)` instead.
"""


class SparseDataset(ABC):
"""DEPRECATED.

Use CSRDataset, CSCDataset, and sparse_dataset from anndata.experimental instead.
"""

def __new__(cls, group):
warnings.warn(FutureWarning(_sparsedataset_depr_msg), stacklevel=2)
return sparse_dataset(group)

@classmethod
def __subclasshook__(cls, C):
warnings.warn(FutureWarning(_sparsedataset_depr_msg), stacklevel=3)
return issubclass(C, (CSRDataset, CSCDataset))
2 changes: 1 addition & 1 deletion tests/test_backed_sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -549,7 +549,7 @@ def test_append_overflow_check(group_fn, sparse_class, tmpdir):
backed = sparse_dataset(group["mtx"])

# Checking for correct caching behaviour
backed.indptr
backed._indptr

with pytest.raises(
OverflowError,
Expand Down
34 changes: 0 additions & 34 deletions tests/test_deprecations.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
import h5py
import numpy as np
import pytest
import zarr
from scipy import sparse

import anndata as ad
Expand Down Expand Up @@ -129,39 +128,6 @@ def test_deprecated_read(tmp_path):
assert_equal(memory, from_disk)


def test_deprecated_sparse_dataset_values():
import zarr

from anndata.experimental import sparse_dataset

mtx = sparse.random(50, 50, format="csr")
g = zarr.group()

ad.write_elem(g, "mtx", mtx)
mtx_backed = sparse_dataset(g["mtx"])

with pytest.warns(FutureWarning, match=r"Please use .to_memory()"):
mtx_backed.value

with pytest.warns(FutureWarning, match=r"Please use .format"):
mtx_backed.format_str


def test_deprecated_sparse_dataset():
from anndata._core.sparse_dataset import SparseDataset

mem_X = sparse.random(50, 50, format="csr")
g = zarr.group()
ad.write_elem(g, "X", mem_X)
with pytest.warns(FutureWarning, match=r"SparseDataset is deprecated"):
X = SparseDataset(g["X"])

assert isinstance(X, ad.CSRDataset)

with pytest.warns(FutureWarning, match=r"SparseDataset is deprecated"):
assert isinstance(X, SparseDataset)


@pytest.mark.parametrize("name", anndata.experimental._DEPRECATED)
def test_warn_on_import_from_experimental(name: str):
with pytest.warns(FutureWarning, match=rf"Importing {name}"):
Expand Down
Loading