diff --git a/.azure-pipelines.yml b/.azure-pipelines.yml
index 0d9496423..afc1b4153 100644
--- a/.azure-pipelines.yml
+++ b/.azure-pipelines.yml
@@ -18,14 +18,14 @@ jobs:
python.version: "3.12"
RUN_COVERAGE: yes
TEST_TYPE: "coverage"
- Python3.9:
- python.version: "3.9"
+ Python3.10:
+ python.version: "3.10"
PreRelease:
python.version: "3.12"
DEPENDENCIES_VERSION: "pre-release"
TEST_TYPE: "strict-warning"
minimum_versions:
- python.version: "3.9"
+ python.version: "3.10"
DEPENDENCIES_VERSION: "minimum"
TEST_TYPE: "coverage"
steps:
@@ -88,6 +88,7 @@ jobs:
inputs:
codeCoverageTool: Cobertura
summaryFileLocation: "test-data/coverage.xml"
+ failIfCoverageEmpty: true
condition: eq(variables['TEST_TYPE'], 'coverage')
- task: PublishTestResults@2
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index f46beb094..b7355b6b5 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -25,7 +25,7 @@ jobs:
ASV_DIR: "./benchmarks"
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v4
with:
fetch-depth: 0
@@ -33,18 +33,20 @@ jobs:
if: ${{ github.ref_name != 'main' }}
# Errors on main branch
- - uses: mamba-org/setup-micromamba@v1
+ - uses: mamba-org/setup-micromamba@v2
with:
environment-name: asv
cache-environment: true
+ # Deps documented in https://asv.readthedocs.io/en/latest/installing.html
+ # libmambapy upper bound: https://github.com/airspeed-velocity/asv/issues/1438
create-args: >-
- python=3.11
+ python=${{ matrix.python }}
asv
- mamba
- packaging
+ libmambapy<2
+ conda-build
- name: Cache datasets
- uses: actions/cache@v3
+ uses: actions/cache@v4
with:
path: |
~/.cache
diff --git a/.github/workflows/test-gpu.yml b/.github/workflows/test-gpu.yml
index 4283ac780..97b2c689c 100644
--- a/.github/workflows/test-gpu.yml
+++ b/.github/workflows/test-gpu.yml
@@ -51,10 +51,20 @@ jobs:
- name: Nvidia SMI sanity check
run: nvidia-smi
+ - name: Install yq
+ run: |
+ sudo snap install yq
+
+ - name: Extract max Python version from classifiers
+ run: |
+ classifiers=$(yq .project.classifiers pyproject.toml -oy | grep --only-matching --perl-regexp '(?<=Python :: )(\d\.\d+)')
+ max_version=$(echo "$classifiers" | sort -V | tail -1)
+ echo "max_python_version=$max_version" >> $GITHUB_ENV
+
- name: Install Python
uses: actions/setup-python@v5
with:
- python-version: "3.x"
+ python-version: ${{ env.max_python_version }}
- name: Install UV
uses: hynek/setup-cached-uv@v2
@@ -75,3 +85,10 @@ jobs:
token: ${{ secrets.CODECOV_TOKEN }}
fail_ci_if_error: true
verbose: true
+
+ - name: Remove 'run-gpu-ci' Label
+ if: always()
+ uses: actions-ecosystem/action-remove-labels@v1
+ with:
+ labels: "run-gpu-ci"
+ github_token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 2fba1b508..8b6fd222d 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
- rev: v0.6.2
+ rev: v0.7.3
hooks:
- id: ruff
types_or: [python, pyi, jupyter]
@@ -14,7 +14,7 @@ repos:
exclude_types:
- markdown
- repo: https://github.com/pre-commit/pre-commit-hooks
- rev: v4.6.0
+ rev: v5.0.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
diff --git a/.readthedocs.yml b/.readthedocs.yml
index 764eb57bd..8fa840e28 100644
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -3,6 +3,13 @@ build:
os: ubuntu-20.04
tools:
python: "3.12"
+ jobs:
+ post_checkout:
+ # unshallow so version can be derived from tag
+ - git fetch --unshallow || true
+ pre_build:
+ # run towncrier to preview the next version’s release notes
+ - ( find docs/release-notes -regex '[^.]+[.][^.]+.md' | grep -q . ) && towncrier build --keep || true
sphinx:
configuration: docs/conf.py
fail_on_warning: true # do not change or you will be fired
diff --git a/README.md b/README.md
index af784833a..c7ba77866 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
[![PyPI](https://img.shields.io/pypi/v/anndata.svg)](https://pypi.org/project/anndata)
[![Downloads](https://static.pepy.tech/badge/anndata/month)](https://pepy.tech/project/anndata)
[![Downloads](https://static.pepy.tech/badge/anndata)](https://pepy.tech/project/anndata)
-[![Stars](https://img.shields.io/github/stars/scverse/anndata?logo=GitHub&color=yellow)](https://github.com/scverse/anndata/stargazers)
+[![Stars](https://img.shields.io/github/stars/scverse/anndata?style=flat&logo=github&color=yellow)](https://github.com/scverse/anndata/stargazers)
[![Powered by NumFOCUS](https://img.shields.io/badge/powered%20by-NumFOCUS-orange.svg?style=flat&colorA=E1523D&colorB=007D8A)](http://numfocus.org)
+## Public API
+
+Our public API is documented in the [API section][] of these docs.
+We cannot guarantee the stability of our internal APIs, whether it's the location of a function, its arguments, or something else.
+In other words, we do not officially support (or encourage users to do) something like `from anndata._core import AnnData` as `_core` is both not documented and contains a [leading underscore][].
+However, we are aware that [many users do use these internal APIs][] and thus encourage them to [open an issue][] or migrate to the public API.
+That is, if something is missing from our public API as documented, for example a feature you wish to be exported publicly, please open an issue.
+
+[api section]: https://anndata.readthedocs.io/en/stable/api.html
+[leading underscore]: https://peps.python.org/pep-0008/#public-and-internal-interfaces
+[many users do use these internal APIs]: https://github.com/search?q=%22anndata._io%22&type=code
+[open an issue]: https://github.com/scverse/anndata/issues/new/choose
+
+
## Citation
-If you use `anndata` in your work, please cite the `anndata` pre-print as follows:
+If you use `anndata` in your work, please cite the `anndata` publication as follows:
> **anndata: Annotated data**
>
> Isaac Virshup, Sergei Rybakov, Fabian J. Theis, Philipp Angerer, F. Alexander Wolf
>
-> _bioRxiv_ 2021 Dec 19. doi: [10.1101/2021.12.16.473007](https://doi.org/10.1101/2021.12.16.473007).
+> _JOSS_ 2024 Sep 16. doi: [10.21105/joss.04371](https://doi.org/10.21105/joss.04371).
You can cite the scverse publication as follows:
diff --git a/benchmarks/benchmarks/sparse_dataset.py b/benchmarks/benchmarks/sparse_dataset.py
index 7d217d159..3a6d0dac6 100644
--- a/benchmarks/benchmarks/sparse_dataset.py
+++ b/benchmarks/benchmarks/sparse_dataset.py
@@ -5,7 +5,8 @@
from scipy import sparse
from anndata import AnnData
-from anndata.experimental import sparse_dataset, write_elem
+from anndata._core.sparse_dataset import sparse_dataset
+from anndata._io.specs import write_elem
def make_alternating_mask(n):
diff --git a/ci/scripts/min-deps.py b/ci/scripts/min-deps.py
index b5b0b980e..c6bac0cf4 100755
--- a/ci/scripts/min-deps.py
+++ b/ci/scripts/min-deps.py
@@ -1,4 +1,4 @@
-#!python3
+#!/usr/bin/env python3
from __future__ import annotations
import argparse
diff --git a/ci/scripts/towncrier_automation.py b/ci/scripts/towncrier_automation.py
new file mode 100755
index 000000000..17fd10902
--- /dev/null
+++ b/ci/scripts/towncrier_automation.py
@@ -0,0 +1,108 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import argparse
+import subprocess
+from typing import TYPE_CHECKING
+
+from packaging.version import Version
+
+if TYPE_CHECKING:
+ from collections.abc import Sequence
+
+
+class Args(argparse.Namespace):
+ version: str
+ dry_run: bool
+
+
+def parse_args(argv: Sequence[str] | None = None) -> Args:
+ parser = argparse.ArgumentParser(
+ prog="towncrier-automation",
+ description=(
+ "This script runs towncrier for a given version, "
+ "creates a branch off of the current one, "
+ "and then creates a PR into the original branch with the changes. "
+ "The PR will be backported to main if the current branch is not main."
+ ),
+ )
+ parser.add_argument(
+ "version",
+ type=str,
+ help=(
+ "The new version for the release must have at least three parts, like `major.minor.patch` and no `major.minor`. "
+ "It can have a suffix like `major.minor.patch.dev0` or `major.minor.0rc1`."
+ ),
+ )
+ parser.add_argument(
+ "--dry-run",
+ help="Whether or not to dry-run the actual creation of the pull request",
+ action="store_true",
+ )
+ args = parser.parse_args(argv, Args())
+ # validate the version
+ if len(Version(args.version).release) != 3:
+ msg = f"Version argument {args.version} must contain major, minor, and patch version."
+ raise ValueError(msg)
+ return args
+
+
+def main(argv: Sequence[str] | None = None) -> None:
+ args = parse_args(argv)
+
+ # Run towncrier
+ subprocess.run(
+ ["towncrier", "build", f"--version={args.version}", "--yes"], check=True
+ )
+
+ # Check if we are on the main branch to know if we need to backport
+ base_branch = subprocess.run(
+ ["git", "rev-parse", "--abbrev-ref", "HEAD"],
+ capture_output=True,
+ text=True,
+ check=True,
+ ).stdout.strip()
+ pr_description = "" if base_branch == "main" else "@meeseeksdev backport to main"
+ branch_name = f"release_notes_{args.version}"
+
+ # Create a new branch + commit
+ subprocess.run(["git", "switch", "-c", branch_name], check=True)
+ subprocess.run(["git", "add", "docs/release-notes"], check=True)
+ pr_title = f"(chore): generate {args.version} release notes"
+ subprocess.run(["git", "commit", "-m", pr_title], check=True)
+
+ # push
+ if not args.dry_run:
+ subprocess.run(
+ ["git", "push", "--set-upstream", "origin", branch_name], check=True
+ )
+ else:
+ print("Dry run, not pushing")
+
+ # Create a PR
+ subprocess.run(
+ [
+ "gh",
+ "pr",
+ "create",
+ f"--base={base_branch}",
+ f"--title={pr_title}",
+ f"--body={pr_description}",
+ "--label=skip-gpu-ci",
+ *(["--label=no milestone"] if base_branch == "main" else []),
+ *(["--dry-run"] if args.dry_run else []),
+ ],
+ check=True,
+ )
+
+ # Enable auto-merge
+ if not args.dry_run:
+ subprocess.run(
+ ["gh", "pr", "merge", branch_name, "--auto", "--squash"], check=True
+ )
+ else:
+ print("Dry run, not merging")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/docs/api.md b/docs/api.md
index 92139fe06..60cbbf61c 100644
--- a/docs/api.md
+++ b/docs/api.md
@@ -15,7 +15,8 @@ The central class:
## Combining
-Combining AnnData objects. See also the section on concatenation.
+Combining {class}`AnnData` objects.
+See also the section on concatenation.
```{eval-rst}
.. autosummary::
@@ -26,44 +27,67 @@ Combining AnnData objects. See also the section on concatenation.
## Reading
-Reading anndata’s native file format `.h5ad`.
+Reading anndata’s native formats `.h5ad` and `zarr`.
```{eval-rst}
.. autosummary::
:toctree: generated/
- read_h5ad
+ io.read_h5ad
+ io.read_zarr
```
-Reading other file formats.
+Reading individual portions ({attr}`~AnnData.obs`, {attr}`~AnnData.varm` etc.) of the {class}`AnnData` object.
```{eval-rst}
.. autosummary::
:toctree: generated/
- read_csv
- read_excel
- read_hdf
- read_loom
- read_mtx
- read_text
- read_umi_tools
- read_zarr
+ io.read_elem
+ io.sparse_dataset
+```
+
+Reading file formats that cannot represent all aspects of {class}`AnnData` objects.
+
+```{tip}
+You might have more success by assembling the {class}`AnnData` object yourself from the individual parts.
+```
+
+```{eval-rst}
+.. autosummary::
+ :toctree: generated/
+ io.read_csv
+ io.read_excel
+ io.read_hdf
+ io.read_loom
+ io.read_mtx
+ io.read_text
+ io.read_umi_tools
```
## Writing
-Writing to anndata’s native file format `.h5ad`.
+Writing a complete {class}`AnnData` object to disk in anndata’s native formats `.h5ad` and `zarr`.
```{eval-rst}
.. autosummary::
:toctree: generated/
AnnData.write
+ AnnData.write_zarr
+```
+
+Writing individual portions ({attr}`~AnnData.obs`, {attr}`~AnnData.varm` etc.) of the {class}`AnnData` object.
+
+```{eval-rst}
+.. autosummary::
+ :toctree: generated/
+
+ io.write_elem
```
-Writing to other formats.
+Writing formats that cannot represent all aspects of {class}`AnnData` objects.
```{eval-rst}
.. autosummary::
@@ -71,7 +95,6 @@ Writing to other formats.
AnnData.write_csvs
AnnData.write_loom
- AnnData.write_zarr
```
(experimental-api)=
@@ -79,10 +102,10 @@ Writing to other formats.
## Experimental API
```{warning}
-API's in the experimental module are currently in development and subject to change at any time.
+APIs in the experimental module are currently in development and subject to change at any time.
```
-Two classes for working with batched access to collections of many `AnnData` objects or `h5ad` files.
+Two classes for working with batched access to collections of many {class}`AnnData` objects or `.h5ad` files.
In particular, for pytorch-based models.
```{eval-rst}
@@ -93,17 +116,6 @@ In particular, for pytorch-based models.
experimental.AnnLoader
```
-Interface for accessing on-disk sparse data:
-
-```{eval-rst}
-.. autosummary::
- :toctree: generated/
-
- experimental.sparse_dataset
- experimental.CSRDataset
- experimental.CSCDataset
-```
-
Out of core concatenation
```{eval-rst}
@@ -113,14 +125,12 @@ Out of core concatenation
experimental.concat_on_disk
```
-Low level methods for reading and writing elements of an `AnnData` object to a store:
+Low level methods for reading and writing elements of an {class}`AnnData` object to a store:
```{eval-rst}
.. autosummary::
:toctree: generated/
- experimental.read_elem
- experimental.write_elem
experimental.read_elem_as_dask
```
@@ -141,8 +151,6 @@ Types used by the former:
:toctree: generated/
experimental.IOSpec
- experimental.InMemoryElem
- experimental.RWAble
experimental.Read
experimental.Write
experimental.ReadCallback
@@ -168,3 +176,16 @@ Types used by the former:
settings
settings.override
```
+
+## Custom Types/Classes for Readable/Writeable Elements
+
+```{eval-rst}
+.. autosummary::
+ :toctree: generated/
+
+ abc.CSRDataset
+ abc.CSCDataset
+ typing.Index
+ typing.AxisStorable
+ typing.RWAble
+```
diff --git a/docs/benchmark-read-write.ipynb b/docs/benchmark-read-write.ipynb
index 886bfa0f6..365a585ec 100644
--- a/docs/benchmark-read-write.ipynb
+++ b/docs/benchmark-read-write.ipynb
@@ -159,7 +159,7 @@
],
"source": [
"%%time\n",
- "adata = ad.read_loom(\"test.loom\")"
+ "adata = ad.io.read_loom(\"test.loom\")"
]
}
],
diff --git a/docs/concatenation.rst b/docs/concatenation.rst
index be644dceb..ce6547b66 100644
--- a/docs/concatenation.rst
+++ b/docs/concatenation.rst
@@ -54,8 +54,8 @@ When the variables present in the objects to be concatenated aren't exactly the
This is otherwise called taking the `"inner"` (intersection) or `"outer"` (union) join.
For example, given two anndata objects with differing variables:
- >>> a = AnnData(sparse.eye(3), var=pd.DataFrame(index=list("abc")))
- >>> b = AnnData(sparse.eye(2), var=pd.DataFrame(index=list("ba")))
+ >>> a = AnnData(sparse.eye(3, format="csr"), var=pd.DataFrame(index=list("abc")))
+ >>> b = AnnData(sparse.eye(2, format="csr"), var=pd.DataFrame(index=list("ba")))
>>> ad.concat([a, b], join="inner").X.toarray()
array([[1., 0.],
[0., 1.],
@@ -208,11 +208,11 @@ Note that comparisons are made after indices are aligned.
That is, if the objects only share a subset of indices on the alternative axis, it's only required that values for those indices match when using a strategy like `"same"`.
>>> a = AnnData(
- ... sparse.eye(3),
+ ... sparse.eye(3, format="csr"),
... var=pd.DataFrame({"nums": [1, 2, 3]}, index=list("abc"))
... )
>>> b = AnnData(
- ... sparse.eye(2),
+ ... sparse.eye(2, format="csr"),
... var=pd.DataFrame({"nums": [2, 1]}, index=list("ba"))
... )
>>> ad.concat([a, b], merge="same").var
diff --git a/docs/conf.py b/docs/conf.py
index 6a0006a70..f98fe5ba7 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -13,7 +13,8 @@
from sphinx.application import Sphinx
HERE = Path(__file__).parent
-sys.path[:0] = [str(HERE / "extensions")]
+_extension_dir = HERE / "extensions"
+sys.path[:0] = [str(_extension_dir)]
# -- General configuration ------------------------------------------------
@@ -61,11 +62,8 @@
"sphinx.ext.linkcode",
"nbsphinx",
"IPython.sphinxext.ipython_console_highlighting",
- "patch_sphinx_toolbox_autoprotocol", # internal extension
"sphinx_toolbox.more_autodoc.autoprotocol",
- # other internal extensions
- "patch_myst_cite",
- "release_notes",
+ *(p.stem for p in _extension_dir.glob("*.py")),
]
myst_enable_extensions = [
"html_image", # So README.md can be used on github and sphinx docs
@@ -112,6 +110,10 @@
("py:class", "awkward.highlevel.Array"),
("py:class", "anndata._core.sparse_dataset.BaseCompressedSparseDataset"),
("py:obj", "numpy._typing._array_like._ScalarType_co"),
+ # https://github.com/sphinx-doc/sphinx/issues/10974
+ ("py:class", "numpy.int64"),
+ # https://github.com/tox-dev/sphinx-autodoc-typehints/issues/498
+ ("py:class", "types.EllipsisType"),
]
@@ -121,17 +123,17 @@ def setup(app: Sphinx):
intersphinx_mapping = dict(
- h5py=("https://docs.h5py.org/en/latest/", None),
- hdf5plugin=("https://hdf5plugin.readthedocs.io/en/latest/", None),
- loompy=("https://linnarssonlab.org/loompy/", None),
- numpy=("https://numpy.org/doc/stable/", None),
- pandas=("https://pandas.pydata.org/pandas-docs/stable/", None),
+ h5py=("https://docs.h5py.org/en/latest", None),
+ hdf5plugin=("https://hdf5plugin.readthedocs.io/en/latest", None),
+ loompy=("https://linnarssonlab.org/loompy", None),
+ numpy=("https://numpy.org/doc/stable", None),
+ pandas=("https://pandas.pydata.org/pandas-docs/stable", None),
python=("https://docs.python.org/3", None),
- scipy=("https://docs.scipy.org/doc/scipy/", None),
- sklearn=("https://scikit-learn.org/stable/", None),
- zarr=("https://zarr.readthedocs.io/en/stable/", None),
- xarray=("https://xarray.pydata.org/en/stable/", None),
- dask=("https://docs.dask.org/en/stable/", None),
+ scipy=("https://docs.scipy.org/doc/scipy", None),
+ sklearn=("https://scikit-learn.org/stable", None),
+ zarr=("https://zarr.readthedocs.io/en/stable", None),
+ xarray=("https://docs.xarray.dev/en/stable", None),
+ dask=("https://docs.dask.org/en/stable", None),
)
qualname_overrides = {
"h5py._hl.group.Group": "h5py.Group",
@@ -142,13 +144,12 @@ def setup(app: Sphinx):
"anndata._types.WriteCallback": "anndata.experimental.WriteCallback",
"anndata._types.Read": "anndata.experimental.Read",
"anndata._types.Write": "anndata.experimental.Write",
- "anndata._types.RWAble": "anndata.experimental.RWAble",
}
autodoc_type_aliases = dict(
NDArray=":data:`~numpy.typing.NDArray`",
- RWAble=":data:`~anndata.experimental.RWAble`",
+ AxisStorable=":data:`~anndata.typing.AxisStorable`",
**{
- f"{v}variantInMemoryType": ":data:`~anndata.experimental.InMemoryElem`"
+ f"{v}variantRWAble": ":data:`~anndata.typing.RWAble`"
for v in ["In", "Co", "Contra"]
},
)
diff --git a/docs/contributing.md b/docs/contributing.md
index c45cc032e..d16020a4f 100644
--- a/docs/contributing.md
+++ b/docs/contributing.md
@@ -6,6 +6,21 @@ AnnData follows the development practices outlined in the [Scanpy contribution g
.. include:: _key_contributors.rst
```
+## Release Notes
+
+AnnData differs from `scanpy` (for now) in how its releases are done.
+It uses [towncrier][] to build its changelog.
+We have set up some automation around this process.
+To run `towncrier`, create a `PR` into the base branch of the release with the compiled changelog, and backport to `main` if needed (i.e., the base branch is something like `0.10.x`), run
+
+```shell
+hatch run towncrier:build X.Y.Z
+```
+
+You may add the option `--dry-run` at the end to do the local steps without pushing to Github, although the push will be mocked via [`gh pr --dry-run`](https://cli.github.com/manual/gh_pr_create).
+
+[towncrier]: https://towncrier.readthedocs.io/en/stable/
+
## CI
### GPU CI
diff --git a/docs/extensions/no_skip_abc_members.py b/docs/extensions/no_skip_abc_members.py
new file mode 100644
index 000000000..66846e095
--- /dev/null
+++ b/docs/extensions/no_skip_abc_members.py
@@ -0,0 +1,28 @@
+"""Sphinx extension to not skip abstract methods."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from typing import Literal
+
+ from sphinx.application import Sphinx
+ from sphinx.ext.autodoc import Options
+
+
+def autodoc_skip_member(
+ app: Sphinx,
+ what: Literal["module", "class", "exception", "function", "method", "attribute"],
+ name: str,
+ obj: object,
+ skip: bool,
+ options: Options,
+):
+ if what == "method" and getattr(obj, "__isabstractmethod__", False):
+ return False
+ return None
+
+
+def setup(app: Sphinx):
+ app.connect("autodoc-skip-member", autodoc_skip_member)
diff --git a/docs/extensions/patch_sphinx_toolbox_autoprotocol.py b/docs/extensions/patch_sphinx_toolbox_autoprotocol.py
deleted file mode 100644
index bafe24cc4..000000000
--- a/docs/extensions/patch_sphinx_toolbox_autoprotocol.py
+++ /dev/null
@@ -1,43 +0,0 @@
-from __future__ import annotations
-
-from typing import TYPE_CHECKING
-
-from sphinx.ext.autodoc import ObjectMember
-from sphinx_toolbox.more_autodoc.autoprotocol import ProtocolDocumenter
-
-if TYPE_CHECKING:
- from typing import Self
-
- from sphinx.application import Sphinx
-
-
-def patch_sphinx_toolbox_autoprotocol():
- """Compat hack: https://github.com/sphinx-toolbox/sphinx-toolbox/issues/168"""
-
- class ObjectMemberCompat(ObjectMember):
- @classmethod
- def from_other(cls, other: ObjectMember) -> Self:
- return cls(
- other.__name__,
- other.object,
- docstring=other.docstring,
- class_=other.class_,
- skipped=other.skipped,
- )
-
- def __iter__(self):
- return iter([self.__name__, self.object])
-
- filter_orig = ProtocolDocumenter.filter_members
-
- def filter_members(
- self, members: list[ObjectMember], want_all: bool
- ) -> list[tuple[str, object, bool]]:
- member_tuples = [ObjectMemberCompat.from_other(m) for m in members]
- return filter_orig(self, member_tuples, want_all)
-
- ProtocolDocumenter.filter_members = filter_members
-
-
-def setup(_app: Sphinx) -> None:
- patch_sphinx_toolbox_autoprotocol()
diff --git a/docs/extensions/release_notes.py b/docs/extensions/release_notes.py
deleted file mode 100644
index bb28453a7..000000000
--- a/docs/extensions/release_notes.py
+++ /dev/null
@@ -1,111 +0,0 @@
-from __future__ import annotations
-
-import itertools
-import re
-from pathlib import Path
-from typing import TYPE_CHECKING
-
-from docutils import nodes
-from packaging.version import Version
-from sphinx.util.docutils import SphinxDirective
-
-if TYPE_CHECKING:
- from collections.abc import Iterable, Sequence
- from typing import ClassVar
-
- from myst_parser.mdit_to_docutils.base import DocutilsRenderer
- from sphinx.application import Sphinx
-
-
-FULL_VERSION_RE = re.compile(r"^(\d+)\.(\d+)\.(\d+)$")
-
-
-class ReleaseNotes(SphinxDirective):
- required_arguments: ClassVar = 1
-
- def run(self) -> Sequence[nodes.Node]:
- dir_ = Path(self.arguments[0])
- # resolve relative dir
- if not dir_.is_absolute():
- src_file = Path(self.get_source_info()[0])
- if not src_file.is_file():
- msg = f"Cannot find relative path to: {src_file}"
- raise self.error(msg)
- dir_ = src_file.parent / self.arguments[0]
- if not dir_.is_dir():
- msg = f"Not a directory: {dir_}"
- raise self.error(msg)
-
- versions = sorted(
- (
- (Version(f.stem), f)
- for f in dir_.iterdir()
- if FULL_VERSION_RE.match(f.stem)
- ),
- reverse=True, # descending
- )
- version_groups = itertools.groupby(
- versions, key=lambda vf: (vf[0].major, vf[0].minor)
- )
- for (major, minor), versions in version_groups:
- self.render_version_group(major, minor, versions)
- return []
-
- def render_version_group(
- self, major: int, minor: int, versions: Iterable[tuple[Version, Path]]
- ) -> None:
- target = nodes.target(
- ids=[f"v{major}-{minor}"],
- names=[f"v{major}.{minor}"],
- )
- section = nodes.section(
- "",
- nodes.title("", f"Version {major}.{minor}"),
- ids=[],
- names=[f"version {major}.{minor}"],
- )
- self.state.document.note_implicit_target(section)
- self.state.document.note_explicit_target(target)
- # append target and section to parent
- self.renderer.current_node.append(target)
- self.renderer.update_section_level_state(section, 2)
- # append children to section
- with self.renderer.current_node_context(section):
- for _, p in versions:
- self.render_include(p)
-
- def render_include(self, path: Path) -> None:
- # hacky solution because of https://github.com/executablebooks/MyST-Parser/issues/967
- from docutils.parsers.rst.directives.misc import Include
- from myst_parser.mocking import MockIncludeDirective
-
- srcfile, lineno = self.get_source_info()
- parent_dir = Path(srcfile).parent
-
- d = MockIncludeDirective(
- renderer=self.renderer,
- name=type(self).__name__,
- klass=Include, # type: ignore # wrong type hint
- arguments=[str(path.relative_to(parent_dir))],
- options={},
- body=[],
- lineno=lineno,
- )
- d.run()
-
- # TODO: replace the above with this once the above mentioned bug is fixed
- # from sphinx.util.parsing import nested_parse_to_nodes
- # return nested_parse_to_nodes(
- # self.state,
- # path.read_text(),
- # source=str(path),
- # offset=self.content_offset,
- # )
-
- @property
- def renderer(self) -> DocutilsRenderer:
- return self.state._renderer
-
-
-def setup(app: Sphinx) -> None:
- app.add_directive("release-notes", ReleaseNotes)
diff --git a/docs/fileformat-prose.md b/docs/fileformat-prose.md
index 3fdc68788..831b441e9 100644
--- a/docs/fileformat-prose.md
+++ b/docs/fileformat-prose.md
@@ -476,7 +476,7 @@ That is, we store an indicator array (or mask) of null values alongside the arra
:sync: hdf5
```python
->>> from anndata.experimental import write_elem
+>>> from anndata import write_elem
>>> null_store = h5py.File("tmp.h5", mode="w")
>>> int_array = pd.array([1, None, 3, 4])
>>> int_array
@@ -498,7 +498,7 @@ nullable_integer/values
:sync: zarr
```python
->>> from anndata.experimental import write_elem
+>>> from anndata import write_elem
>>> null_store = zarr.open()
>>> int_array = pd.array([1, None, 3, 4])
>>> int_array
@@ -635,7 +635,7 @@ function:
```python
>>> import awkward as ak
->>> from anndata.experimental import read_elem
+>>> from anndata.io import read_elem
>>> awkward_group = store["varm/transcript"]
>>> ak.from_buffers(
... awkward_group.attrs["form"],
diff --git a/docs/release-notes/0.10.0.md b/docs/release-notes/0.10.0.md
index a4ed8a826..586850969 100644
--- a/docs/release-notes/0.10.0.md
+++ b/docs/release-notes/0.10.0.md
@@ -13,7 +13,7 @@
* Concatenate on-disk anndata objects with {func}`anndata.experimental.concat_on_disk` {pr}`955` {user}`selmanozleyen`
* AnnData can now hold dask arrays with `scipy.sparse.spmatrix` chunks {pr}`1114` {user}`ivirshup`
-* Public API for interacting with on disk sparse arrays: {func}`~anndata.experimental.sparse_dataset`, {class}`~anndata.experimental.CSRDataset`, and {class}`~anndata.experimental.CSCDataset` {pr}`765` {user}`ilan-gold` {user}`ivirshup`
+* Public API for interacting with on disk sparse arrays: {func}`~anndata.io.sparse_dataset`, {class}`~anndata.abc.CSRDataset`, and {class}`~anndata.abc.CSCDataset` {pr}`765` {user}`ilan-gold` {user}`ivirshup`
* Improved performance for simple slices of OOC sparse arrays {pr}`1131` {user}`ivirshup`
**Improved errors and warnings**
@@ -37,7 +37,7 @@
#### Deprecations
-* Deprecate `anndata.read`, which was just an alias for {func}`anndata.read_h5ad` {pr}`1108` {user}`ivirshup`.
+* Deprecate `anndata.read`, which was just an alias for {func}`anndata.io.read_h5ad` {pr}`1108` {user}`ivirshup`.
* `dtype` argument to `AnnData` constructor is now deprecated {pr}`1153` {user}`ivirshup`
#### Bug fixes
diff --git a/docs/release-notes/0.10.1.md b/docs/release-notes/0.10.1.md
index dae8af856..858d2f5fd 100644
--- a/docs/release-notes/0.10.1.md
+++ b/docs/release-notes/0.10.1.md
@@ -1,6 +1,6 @@
(v0.10.1)=
### 0.10.1 {small}`2023-10-08`
-#### Bugfix
+#### Bug fixes
* Fix `ad.concat` erroring when concatenating a categorical and object column {pr}`1171` {user}`ivirshup`
diff --git a/docs/release-notes/0.10.2.md b/docs/release-notes/0.10.2.md
index ea878abcf..e2b8b36fd 100644
--- a/docs/release-notes/0.10.2.md
+++ b/docs/release-notes/0.10.2.md
@@ -1,10 +1,10 @@
(v0.10.2)=
### 0.10.2 {small}`2023-10-11`
-#### Bugfix
+#### Bug fixes
* Added compatibility layer for packages relying on `anndata._core.sparse_dataset.SparseDataset`.
- Note that this API is *deprecated* and new code should use {class}`~anndata.experimental.CSRDataset`, {class}`~anndata.experimental.CSCDataset`, and {func}`~anndata.experimental.sparse_dataset` instead.
+ Note that this API is *deprecated* and new code should use `anndata.CSRDataset`, `~anndata.CSCDataset`, and `anndata.sparse_dataset` instead.
{pr}`1185` {user}`ivirshup`
* Handle deprecation warning from `pd.Categorical.map` thrown during `anndata.concat` {pr}`1189` {user}`flying-sheep` {user}`ivirshup`
* Fixed extra steps being included in IO tracebacks {pr}`1193` {user}`flying-sheep`
diff --git a/docs/release-notes/0.10.3.md b/docs/release-notes/0.10.3.md
index 4e5918d40..022b61050 100644
--- a/docs/release-notes/0.10.3.md
+++ b/docs/release-notes/0.10.3.md
@@ -1,7 +1,7 @@
(v0.10.3)=
### 0.10.3 {small}`2023-10-31`
-#### Bugfix
+#### Bug fixes
* Prevent pandas from causing infinite recursion when setting a slice of a categorical column {pr}`1211` {user}`flying-sheep`
#### Documentation
diff --git a/docs/release-notes/0.10.4.md b/docs/release-notes/0.10.4.md
index 46ec222a9..592593163 100644
--- a/docs/release-notes/0.10.4.md
+++ b/docs/release-notes/0.10.4.md
@@ -1,7 +1,7 @@
(v0.10.4)=
### 0.10.4 {small}`2024-01-04`
-#### Bugfix
+#### Bug fixes
* Only try to use `Categorical.map(na_action=…)` in actually supported Pandas ≥2.1 {pr}`1226` {user}`flying-sheep`
* `AnnData.__sizeof__()` support for backed datasets {pr}`1230` {user}`Neah-Ko`
* `adata[:, []]` now returns an `AnnData` object empty on the appropriate dimensions instead of erroring {pr}`1243` {user}`ilan-gold`
diff --git a/docs/release-notes/0.10.5.md b/docs/release-notes/0.10.5.md
index edb7db10b..8987285b7 100644
--- a/docs/release-notes/0.10.5.md
+++ b/docs/release-notes/0.10.5.md
@@ -1,7 +1,7 @@
(v0.10.5)=
### 0.10.5 {small}`2024-01-25`
-#### Bugfix
+#### Bug fixes
* Fix outer concatenation along variables when only a subset of objects had an entry in layers {pr}`1291` {user}`ivirshup`
* Fix comparison of >2d arrays in `uns` during concatenation {pr}`1300` {user}`ivirshup`
diff --git a/docs/release-notes/0.10.6.md b/docs/release-notes/0.10.6.md
index 4bef8f562..e26fdf49d 100644
--- a/docs/release-notes/0.10.6.md
+++ b/docs/release-notes/0.10.6.md
@@ -1,7 +1,7 @@
(v0.10.6)=
### 0.10.6 {small}`2024-03-11`
-#### Bugfix
+#### Bug fixes
* Defer import of zarr in test helpers, as scanpy CI job relies on them {pr}`1343` {user}`ilan-gold`
* Writing a dataframe with non-unique column names now throws an error, instead of silently overwriting {pr}`1335` {user}`ivirshup`
diff --git a/docs/release-notes/0.10.7.md b/docs/release-notes/0.10.7.md
index f3ea34cd0..1832b95a4 100644
--- a/docs/release-notes/0.10.7.md
+++ b/docs/release-notes/0.10.7.md
@@ -1,7 +1,7 @@
(v0.10.7)=
### 0.10.7 {small}`2024-04-09`
-#### Bugfix
+#### Bug fixes
* Handle upstream `numcodecs` bug where read-only string arrays cannot be encoded {user}`ivirshup` {pr}`1421`
* Use in-memory sparse matrix directly to fix compatibility with `scipy` `1.13` {user}`ilan-gold` {pr}`1435`
diff --git a/docs/release-notes/0.10.8.md b/docs/release-notes/0.10.8.md
index 324c9d571..d69102d51 100644
--- a/docs/release-notes/0.10.8.md
+++ b/docs/release-notes/0.10.8.md
@@ -1,12 +1,12 @@
(v0.10.8)=
### 0.10.8 {small}`2024-06-20`
-#### Bugfix
+#### Bug fixes
* Write out `64bit` indptr when appropriate for {func}`~anndata.experimental.concat_on_disk` {pr}`1493` {user}`ilan-gold`
* Support for Numpy 2 {pr}`1499` {user}`flying-sheep`
-* Fix {func}`~anndata.experimental.sparse_dataset` docstring test on account of new {mod}`scipy` version {pr}`1514` {user}`ilan-gold`
+* Fix {func}`~anndata.io.sparse_dataset` docstring test on account of new {mod}`scipy` version {pr}`1514` {user}`ilan-gold`
#### Documentation
-* Improved example for {func}`~anndata.experimental.sparse_dataset` {pr}`1468` {user}`ivirshup`
+* Improved example for {func}`~anndata.io.sparse_dataset` {pr}`1468` {user}`ivirshup`
diff --git a/docs/release-notes/0.10.9.md b/docs/release-notes/0.10.9.md
index 012e5e89f..ae0aadb54 100644
--- a/docs/release-notes/0.10.9.md
+++ b/docs/release-notes/0.10.9.md
@@ -1,7 +1,7 @@
(v0.10.9)=
### 0.10.9 {small}`2024-08-28`
-#### Bugfix
+#### Bug fixes
- Fix writing large number of columns for `h5` files {user}`ilan-gold` {user}`selmanozleyen` ({pr}`1147`)
- Add warning for setting `X` on a view with repeated indices {user}`ilan-gold` ({pr}`1501`)
@@ -16,7 +16,7 @@
- create new `cupy` installation options for cuda 11 & 12 called `cu11` and `cu12` {user}`Intron7` ({pr}`1596`)
-#### Doc
+#### Documentation
- add `callback` typing for {func}`~anndata.experimental.read_dispatched` and {func}`~anndata.experimental.write_dispatched` {user}`ilan-gold` ({pr}`1557`)
diff --git a/docs/release-notes/0.11.0.md b/docs/release-notes/0.11.0.md
new file mode 100644
index 000000000..317175f50
--- /dev/null
+++ b/docs/release-notes/0.11.0.md
@@ -0,0 +1,49 @@
+(v0.11.0)=
+### 0.11.0 {small}`2024-11-07`
+
+Release candidates:
+
+- (v0.11.0rc3)=
+ {guilabel}`rc3` 2024-10-14
+- (v0.11.0rc2)=
+ {guilabel}`rc2` 2024-09-24
+- (v0.11.0rc1)=
+ {guilabel}`rc1` 2024-09-04
+
+#### Bug fixes
+
+- Ensure {func}`anndata.concat` of {class}`~anndata.AnnData` object with {class}`scipy.sparse.spmatrix` and {class}`scipy.sparse.sparray` dask arrays uses the correct fill value of 0. {user}`ilan-gold` ({pr}`1719`)
+- Ensure that views of AwkwardArrays have their "view" attributes removed on saving an {class}`~anndata.AnnData` object to disk. {user}`grst` ({pr}`1736`)
+
+#### Breaking changes
+
+- {guilabel}`rc3` Drop support for `python` 3.9 {user}`ilan-gold` ({pr}`1712`)
+- {guilabel}`rc2` A new `anndata.io` module contains all `read_*` and `write_*` functions, and all imports of such functions should go through this module. Old ways of importing these functions i.e., `from anndata import read_csv` or `from anndata._io.specs import read_elem` will still work, but are now considered deprecated and give a warning on import with the exception of {func}`anndata.io.read_zarr` and {func}`anndata.io.read_h5ad`, which will remain at the top-level `anndata` without warning. {user}`ilan-gold ({pr}`1682`)
+- {guilabel}`rc1` Removed deprecated modules `anndata.core` and `anndata.readwrite` {user}`ivirshup` ({pr}`1197`)
+- {guilabel}`rc1` No longer export `sparse_dataset` from `anndata.experimental`, instead exporting {func}`anndata.io.sparse_dataset` {user}`ilan-gold` ({pr}`1642`)
+- {guilabel}`rc1` Move `RWAble` and `InMemoryElem` out of `experimental`, renaming `RWAble` to {type}`~anndata.typing.AxisStorable` and `InMemoryElem` to {type}`~anndata.typing.RWAble` {user}`ilan-gold` ({pr}`1643`)
+
+#### Development Process
+
+- {guilabel}`rc2` Add extra `dask` dependency for installation i.e., `pip install anndata[dask]` {user}`ilan-gold` ({pr}`1677`)
+- {guilabel}`rc2` Remove `shall_` from variable names in `settings` {user}`ilan-gold` ({pr}`1685`)
+- {guilabel}`rc1` Create new `cupy` installation options for cuda 11 & 12 called `cu11` and `cu12` {user}`Intron7` ({pr}`1596`)
+
+#### Documentation
+
+- {guilabel}`rc1` Correct {attr}`anndata.AnnData.X` type to include {class}`~anndata.abc.CSRDataset` and {class}`~anndata.abc.CSCDataset` as possible types and being deprecation process for non-csr/csc {class}`scipy.sparse.spmatrix` types in {attr}`anndata.AnnData.X` {user}`ilan-gold` ({pr}`1616`)
+
+#### Features
+
+- Add support for ellipsis indexing of the {class}`~anndata.AnnData` object {user}`ilan-gold` ({pr}`1729`)
+- {guilabel}`rc1` `scipy.sparse.csr_array` and `scipy.sparse.csc_array` are now supported when constructing `AnnData` objects {user}`ilan-gold` {user}`isaac-virshup` ({pr}`1028`)
+- {guilabel}`rc1` Allow `axis` parameter of e.g. {func}`anndata.concat` to accept `'obs'` and `'var'` {user}`flying-sheep` ({pr}`1244`)
+- {guilabel}`rc1` Add `settings` object with methods for altering internally-used options, like checking for uniqueness on `obs`' index {user}`ilan-gold` ({pr}`1270`)
+- {guilabel}`rc1` Add {attr}`~anndata.settings.remove_unused_categories` option to {attr}`anndata.settings` to override current behavior {user}`ilan-gold` ({pr}`1340`)
+- {guilabel}`rc1` Add {func}`~anndata.experimental.read_elem_as_dask` function to handle i/o with sparse and dense arrays {user}`ilan-gold` ({pr}`1469`)
+- {guilabel}`rc1` Add ability to convert strings to categoricals on write in {meth}`~anndata.AnnData.write_h5ad` and {meth}`~anndata.AnnData.write_zarr` via `convert_strings_to_categoricals` parameter {user}` falexwolf` ({pr}`1474`)
+- {guilabel}`rc1` Add {attr}`~anndata.settings.check_uniqueness` option to {attr}`anndata.settings` to override current behavior {user}`ilan-gold` ({pr}`1507`)
+- {guilabel}`rc1` Add functionality to write from GPU {class}`dask.array.Array` to disk {user}`ilan-gold` ({pr}`1550`)
+- {guilabel}`rc1` Read and write support for nullable string arrays ({class}`pandas.arrays.StringArray`). Use pandas’ {doc}`pandas:user_guide/options` `mode.string_storage` to control which storage mode is used when reading `dtype="string"` columns. {user}`flying-sheep` ({pr}`1558`)
+- {guilabel}`rc1` Export {func}`~anndata.io.write_elem` and {func}`~anndata.io.read_elem` directly from the main package instead of `experimental` {user}`ilan-gold` ({pr}`1598`)
+- {guilabel}`rc1` Allow reading sparse data (via {func}`~anndata.io.read_elem` or {func}`~anndata.io.sparse_dataset`) into either {class}`scipy.sparse.csr_array` or {class}`scipy.sparse.csc_array` via {attr}`anndata.settings.use_sparse_array_on_read` {user}`ilan-gold` ({pr}`1633`)
diff --git a/docs/release-notes/0.11.1.md b/docs/release-notes/0.11.1.md
new file mode 100644
index 000000000..8725ecf23
--- /dev/null
+++ b/docs/release-notes/0.11.1.md
@@ -0,0 +1,8 @@
+(v0.11.1)=
+### 0.11.1 {small}`2024-11-12`
+
+### Bug fixes
+
+- Remove upper pin on `dask` and exclude versions broken with sparse indexing {user}`ilan-gold` ({pr}`1725`)
+- Fix chunking with -1 in `chunks` argument of {func}`~anndata.experimental.read_elem_as_dask` {user}`ilan-gold` ({pr}`1743`)
+- Fix `cupy<0.13` imports in non-gpu environments {user}`ilan-gold` ({pr}`1754`)
diff --git a/docs/release-notes/0.5.0.md b/docs/release-notes/0.5.0.md
index 554a7e5f7..d22e1bb24 100644
--- a/docs/release-notes/0.5.0.md
+++ b/docs/release-notes/0.5.0.md
@@ -5,6 +5,6 @@
- automatically remove unused categories after slicing
- read/write [.loom](https://loompy.org) files using loompy 2
- fixed read/write for a few text file formats
-- read [UMI tools] files: {func}`~anndata.read_umi_tools`
+- read [UMI tools] files: {func}`~anndata.io.read_umi_tools`
[umi tools]: https://github.com/CGATOxford/UMI-tools
diff --git a/docs/release-notes/0.6.x.md b/docs/release-notes/0.6.x.md
index a16984372..340343c7a 100644
--- a/docs/release-notes/0.6.x.md
+++ b/docs/release-notes/0.6.x.md
@@ -15,9 +15,9 @@
`0.6.16` {smaller}`A Wolf`
- maintain dtype upon copy.
`0.6.13` {smaller}`A Wolf`
-- {attr}`~anndata.AnnData.layers` inspired by [.loom](https://loompy.org) files allows their information lossless reading via {func}`~anndata.read_loom`.
+- {attr}`~anndata.AnnData.layers` inspired by [.loom](https://loompy.org) files allows their information lossless reading via {func}`~anndata.io.read_loom`.
`0.6.7`–`0.6.9` {pr}`46` & {pr}`48` {smaller}`S Rybakov`
-- support for reading zarr files: {func}`~anndata.read_zarr`
+- support for reading zarr files: {func}`~anndata.io.read_zarr`
`0.6.7` {pr}`38` {smaller}`T White`
- initialization from pandas DataFrames
`0.6.` {smaller}`A Wolf`
diff --git a/docs/release-notes/0.7.6.md b/docs/release-notes/0.7.6.md
index 5bd2779ab..2dd2e54d1 100644
--- a/docs/release-notes/0.7.6.md
+++ b/docs/release-notes/0.7.6.md
@@ -1,7 +1,7 @@
(v0.7.6)=
### 0.7.6 {small}`11 April, 2021`
-#### New features
+#### Features
- Added {meth}`anndata.AnnData.to_memory` for returning an in memory object from a backed one {pr}`470` {pr}`542` {smaller}`V Bergen` {smaller}`I Virshup`
- {meth}`anndata.AnnData.write_loom` now writes `obs_names` and `var_names` using the `Index`'s `.name` attribute, if set {pr}`538` {smaller}`I Virshup`
@@ -18,5 +18,5 @@
#### Deprecations
-- Passing positional arguments to {func}`anndata.read_loom` besides the path is now deprecated {pr}`538` {smaller}`I Virshup`
-- {func}`anndata.read_loom` arguments `obsm_names` and `varm_names` are now deprecated in favour of `obsm_mapping` and `varm_mapping` {pr}`538` {smaller}`I Virshup`
+- Passing positional arguments to {func}`anndata.io.read_loom` besides the path is now deprecated {pr}`538` {smaller}`I Virshup`
+- {func}`anndata.io.read_loom` arguments `obsm_names` and `varm_names` are now deprecated in favour of `obsm_mapping` and `varm_mapping` {pr}`538` {smaller}`I Virshup`
diff --git a/docs/release-notes/0.8.0.md b/docs/release-notes/0.8.0.md
index 0bda4382d..ee5967a9c 100644
--- a/docs/release-notes/0.8.0.md
+++ b/docs/release-notes/0.8.0.md
@@ -15,14 +15,14 @@ This should make it much easier to support new datatypes, use partial access, an
- Each element should be tagged with an `encoding_type` and `encoding_version`. See updated docs on the {doc}`file format `
- Support for nullable integer and boolean data arrays. More data types to come!
-- Experimental support for low level access to the IO API via {func}`~anndata.experimental.read_elem` and {func}`~anndata.experimental.write_elem`
+- Experimental support for low level access to the IO API via {func}`~anndata.io.read_elem` and {func}`~anndata.io.write_elem`
#### Features
- Added PyTorch dataloader {class}`~anndata.experimental.AnnLoader` and lazy concatenation object {class}`~anndata.experimental.AnnCollection`. See the [tutorials] {pr}`416` {smaller}`S Rybakov`
- Compatibility with `h5ad` files written from Julia {pr}`569` {smaller}`I Kats`
- Many logging messages that should have been warnings are now warnings {pr}`650` {smaller}`I Virshup`
-- Significantly more efficient {func}`anndata.read_umi_tools` {pr}`661` {smaller}`I Virshup`
+- Significantly more efficient {func}`anndata.io.read_umi_tools` {pr}`661` {smaller}`I Virshup`
- Fixed deepcopy of a copy of a view retaining sparse matrix view mixin type {pr}`670` {smaller}`M Klein`
- In many cases {attr}`~anndata.AnnData.X` can now be `None` {pr}`463` {smaller}`R Cannoodt` {pr}`677` {smaller}`I Virshup`. Remaining work is documented in {issue}`467`.
- Removed hard `xlrd` dependency {smaller}`I Virshup`
diff --git a/docs/release-notes/0.9.0.md b/docs/release-notes/0.9.0.md
index d38c7f78c..3481ade4c 100644
--- a/docs/release-notes/0.9.0.md
+++ b/docs/release-notes/0.9.0.md
@@ -21,7 +21,7 @@
- {doc}`/interoperability`: new page on interoperability with other packages {pr}`831` {user}`ivirshup`
-- Expanded docstring more documentation for `backed` argument of {func}`anndata.read_h5ad` {pr}`812` {user}`jeskowagner`
+- Expanded docstring more documentation for `backed` argument of {func}`anndata.io.read_h5ad` {pr}`812` {user}`jeskowagner`
- Documented how to use alternative compression methods for the `h5ad` file format, see {meth}`AnnData.write_h5ad() ` {pr}`857` {user}`nigeil`
diff --git a/docs/release-notes/0.9.1.md b/docs/release-notes/0.9.1.md
index f90672d60..383085122 100644
--- a/docs/release-notes/0.9.1.md
+++ b/docs/release-notes/0.9.1.md
@@ -1,6 +1,6 @@
(v0.9.1)=
### 0.9.1 {small}`2023-04-11`
-#### Bugfix
+#### Bug fixes
* Fixing windows support {pr}`958` {user}`Koncopd`
diff --git a/docs/release-notes/0.9.2.md b/docs/release-notes/0.9.2.md
index 286f43b3d..de88c29a8 100644
--- a/docs/release-notes/0.9.2.md
+++ b/docs/release-notes/0.9.2.md
@@ -1,9 +1,9 @@
(v0.9.2)=
### 0.9.2 {small}`2023-07-25`
-#### Bugfix
+#### Bug fixes
* Views of `awkward.Array`s now work with `awkward>=2.3` {pr}`1040` {user}`ivirshup`
* Fix ufuncs of views like `adata.X[:10].cov(axis=0)` returning views {pr}`1043` {user}`flying-sheep`
* Fix instantiating AnnData where `.X` is a `DataFrame` with an integer valued index {pr}`1002` {user}`flying-sheep`
-* Fix {func}`~anndata.read_zarr` when used on `zarr.Group` {pr}`1057` {user}`ivirshup`
+* Fix {func}`~anndata.io.read_zarr` when used on `zarr.Group` {pr}`1057` {user}`ivirshup`
diff --git a/docs/release-notes/1028.feature.md b/docs/release-notes/1028.feature.md
deleted file mode 100644
index 8b2f612f9..000000000
--- a/docs/release-notes/1028.feature.md
+++ /dev/null
@@ -1 +0,0 @@
-`scipy.sparse.csr_array` and `scipy.sparse.csc_array` are now supported when constructing `AnnData` objects {user}`ilan-gold` {user}`isaac-virshup`
diff --git a/docs/release-notes/1197.breaking.md b/docs/release-notes/1197.breaking.md
deleted file mode 100644
index 165e712cc..000000000
--- a/docs/release-notes/1197.breaking.md
+++ /dev/null
@@ -1 +0,0 @@
-Removed deprecated modules `anndata.core` and `anndata.readwrite` {user}`ivirshup`
diff --git a/docs/release-notes/1244.feature.md b/docs/release-notes/1244.feature.md
deleted file mode 100644
index 9a0fd6c30..000000000
--- a/docs/release-notes/1244.feature.md
+++ /dev/null
@@ -1 +0,0 @@
-Allow `axis` parameter of e.g. :func:`anndata.concat` to accept `'obs'` and `'var'` {user}`flying-sheep`
diff --git a/docs/release-notes/1270.feature.md b/docs/release-notes/1270.feature.md
deleted file mode 100644
index 89f07264c..000000000
--- a/docs/release-notes/1270.feature.md
+++ /dev/null
@@ -1 +0,0 @@
-Add `settings` object with methods for altering internally-used options, like checking for uniqueness on `obs`' index {user}`ilan-gold`
diff --git a/docs/release-notes/1340.feature.md b/docs/release-notes/1340.feature.md
deleted file mode 100644
index 8991d630f..000000000
--- a/docs/release-notes/1340.feature.md
+++ /dev/null
@@ -1 +0,0 @@
-Add {attr}`~anndata.settings.should_remove_unused_categories` option to {attr}`anndata.settings` to override current behavior {user}`ilan-gold`
diff --git a/docs/release-notes/1469.feature.md b/docs/release-notes/1469.feature.md
deleted file mode 100644
index abe84f7f6..000000000
--- a/docs/release-notes/1469.feature.md
+++ /dev/null
@@ -1 +0,0 @@
-Add :func:`~anndata.experimental.read_elem_as_dask` function to handle i/o with sparse and dense arrays {user}`ilan-gold`
diff --git a/docs/release-notes/1474.feature.md b/docs/release-notes/1474.feature.md
deleted file mode 100644
index 9c85d982f..000000000
--- a/docs/release-notes/1474.feature.md
+++ /dev/null
@@ -1 +0,0 @@
-Add ability to convert strings to categoricals on write in {meth}`~anndata.AnnData.write_h5ad` and {meth}`~anndata.AnnData.write_zarr` via `convert_strings_to_categoricals` parameter {user}` falexwolf`
diff --git a/docs/release-notes/1507.feature.md b/docs/release-notes/1507.feature.md
deleted file mode 100644
index 13c6224ef..000000000
--- a/docs/release-notes/1507.feature.md
+++ /dev/null
@@ -1 +0,0 @@
-Add {attr}`~anndata.settings.should_check_uniqueness` option to {attr}`anndata.settings` to override current behavior {user}`ilan-gold`
diff --git a/docs/release-notes/1550.feature.md b/docs/release-notes/1550.feature.md
deleted file mode 100644
index bd1bfd37d..000000000
--- a/docs/release-notes/1550.feature.md
+++ /dev/null
@@ -1 +0,0 @@
-Add functionality to write from GPU {class}`dask.array.Array` to disk {user}`ilan-gold`
diff --git a/docs/release-notes/1596.dev.md b/docs/release-notes/1596.dev.md
deleted file mode 100644
index e1b3492ed..000000000
--- a/docs/release-notes/1596.dev.md
+++ /dev/null
@@ -1 +0,0 @@
-create new `cupy` installation options for cuda 11 & 12 called `cu11` and `cu12` {user}`Intron7`
diff --git a/hatch.toml b/hatch.toml
index ad888c3bb..738056567 100644
--- a/hatch.toml
+++ b/hatch.toml
@@ -4,8 +4,26 @@ features = ["dev"]
[envs.docs]
features = ["doc"]
-dependencies = ["setuptools"] # https://bitbucket.org/pybtex-devs/pybtex/issues/169
+extra-dependencies = ["setuptools"] # https://bitbucket.org/pybtex-devs/pybtex/issues/169
+scripts.build = "sphinx-build -M html docs docs/_build -W --keep-going {args}"
+scripts.clean = "git clean -fdX -- {args:docs}"
-[envs.docs.scripts]
-build = "sphinx-build -M html docs docs/_build -W --keep-going {args}"
-clean = "git clean -fX -- docs"
+[envs.towncrier]
+scripts.build = "python3 ci/scripts/towncrier_automation.py {args}"
+scripts.clean = "git restore --source=HEAD --staged --worktree -- docs/release-notes"
+
+[envs.hatch-test]
+default-args = []
+extra-dependencies = ["ipykernel"]
+features = ["dev", "test"]
+overrides.matrix.deps.env-vars = [
+ { key = "UV_PRERELEASE", value = "allow", if = ["pre"] },
+ { key = "UV_RESOLUTION", value = "lowest-direct", if = ["min"] },
+]
+overrides.matrix.deps.python = [
+ { if = ["min"], value = "3.10" },
+ { if = ["stable", "pre"], value = "3.12" },
+]
+
+[[envs.hatch-test.matrix]]
+deps = ["stable", "pre", "min"]
diff --git a/pyproject.toml b/pyproject.toml
index 44fa24401..3cc1b31a3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,7 +5,7 @@ requires = ["hatchling", "hatch-vcs"]
[project]
name = "anndata"
description = "Annotated data."
-requires-python = ">=3.9"
+requires-python = ">=3.10"
license = "BSD-3-Clause"
authors = [
{ name = "Philipp Angerer" },
@@ -29,7 +29,6 @@ classifiers = [
"Operating System :: Microsoft :: Windows",
"Operating System :: POSIX :: Linux",
"Programming Language :: Python :: 3",
- "Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
@@ -43,7 +42,7 @@ dependencies = [
"numpy>=1.23",
# https://github.com/scverse/anndata/issues/1434
"scipy >1.8",
- "h5py>=3.1",
+ "h5py>=3.6",
"exceptiongroup; python_version<'3.11'",
"natsort",
"packaging>=20.0",
@@ -61,10 +60,8 @@ Home-page = "https://github.com/scverse/anndata"
[project.optional-dependencies]
dev = [
# dev version generation
- "setuptools_scm",
- # test speedups
- "pytest-xdist",
- "towncrier>=24.8.0",
+ "setuptools-scm",
+ "anndata[dev-doc,dev-test]",
]
doc = [
"sphinx>=7.4.6",
@@ -72,17 +69,20 @@ doc = [
"sphinx-autodoc-typehints>=2.2.0",
"sphinx-issues",
"sphinx-copybutton",
- "sphinx-toolbox",
+ "sphinx-toolbox>=3.8.0",
"sphinxext.opengraph",
"nbsphinx",
- "scanpydoc[theme,typehints] >=0.13.6",
+ "scanpydoc[theme,typehints] >=0.14.1",
"zarr",
"awkward>=2.0.7",
"IPython", # For syntax highlighting in notebooks
"myst_parser",
"sphinx_design>=0.5.0",
"readthedocs-sphinx-search",
+ # for unreleased changes
+ "anndata[dev-doc]",
]
+dev-doc = ["towncrier>=24.8.0"] # release notes tool
test = [
"loompy>=3.0.5",
"pytest>=8.2",
@@ -95,26 +95,34 @@ test = [
"boltons",
"scanpy",
"httpx", # For data downloading
- "dask[array,distributed]>=2022.09.2,<2024.8.0",
+ "dask[distributed]",
"awkward>=2.3",
"pyarrow",
"pytest_memray",
- "pytest-mock"
+ "pytest-mock",
+ "anndata[dask]",
]
+dev-test = ["pytest-xdist"] # local test speedups
gpu = ["cupy"]
cu12 = ["cupy-cuda12x"]
cu11 = ["cupy-cuda11x"]
+# https://github.com/dask/dask/issues/11290
+dask = ["dask[array]>=2022.09.2,!=2024.8.*,!=2024.9.*"]
[tool.hatch.version]
source = "vcs"
[tool.hatch.build.hooks.vcs]
version-file = "src/anndata/_version.py"
+raw-options.version_scheme = "release-branch-semver"
[tool.hatch.build.targets.wheel]
packages = ["src/anndata", "src/testing"]
[tool.coverage.run]
+data_file = "test-data/coverage"
source_pkgs = ["anndata"]
omit = ["src/anndata/_version.py", "**/test_*.py"]
+[tool.coverage.xml]
+output = "test-data/coverage.xml"
[tool.coverage.paths]
source = ["./src", "**/site-packages"]
@@ -165,12 +173,15 @@ select = [
"E", # Error detected by Pycodestyle
"F", # Errors detected by Pyflakes
"W", # Warning detected by Pycodestyle
+ "PLW", # Pylint
"UP", # pyupgrade
"I", # isort
"TCH", # manage type checking blocks
+ "TID", # Banned imports
"ICN", # Follow import conventions
"PTH", # Pathlib instead of os.path
"PT", # Pytest conventions
+ "PYI", # Typing
]
ignore = [
# line too long -> we accept long comment lines; formatter gets rid of long code lines
@@ -179,6 +190,10 @@ ignore = [
"E731",
# allow I, O, l as variable names -> I is the identity matrix, i, j, k, l is reasonable indexing notation
"E741",
+ # We use relative imports from parent modules
+ "TID252",
+ # Shadowing loop variables isn’t a big deal
+ "PLW2901",
]
[tool.ruff.lint.per-file-ignores]
# E721 comparing types, but we specifically are checking that we aren't getting subtypes (views)
@@ -186,6 +201,10 @@ ignore = [
[tool.ruff.lint.isort]
known-first-party = ["anndata"]
required-imports = ["from __future__ import annotations"]
+[tool.ruff.lint.flake8-tidy-imports.banned-api]
+"subprocess.call".msg = "Use `subprocess.run([…])` instead"
+"subprocess.check_call".msg = "Use `subprocess.run([…], check=True)` instead"
+"subprocess.check_output".msg = "Use `subprocess.run([…], check=True, capture_output=True)` instead"
[tool.ruff.lint.flake8-type-checking]
exempt-modules = []
strict = true
@@ -202,16 +221,10 @@ single_file = false
package_dir = "src"
issue_format = "{{pr}}`{issue}`"
title_format = "(v{version})=\n### {version} {{small}}`{project_date}`"
-[tool.towncrier.fragment.bugfix]
-[tool.towncrier.fragment.doc]
-[tool.towncrier.fragment.feature]
-[tool.towncrier.fragment.misc]
-
-[tool.towncrier.fragment.performance]
-name = "Performance"
-
-[tool.towncrier.fragment.breaking]
-name = "Breaking"
-
-[tool.towncrier.fragment.dev]
-name = "Development Process"
+fragment.bugfix.name = "Bug fixes"
+fragment.doc.name = "Documentation"
+fragment.feature.name = "Features"
+fragment.misc.name = "Miscellaneous improvements"
+fragment.performance.name = "Performance"
+fragment.breaking.name = "Breaking changes"
+fragment.dev.name = "Development Process"
diff --git a/src/anndata/__init__.py b/src/anndata/__init__.py
index c2006cd72..fec027c87 100644
--- a/src/anndata/__init__.py
+++ b/src/anndata/__init__.py
@@ -2,6 +2,11 @@
from __future__ import annotations
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from typing import Any
+
try: # See https://github.com/maresb/hatch-vcs-footgun-example
from setuptools_scm import get_version
@@ -24,17 +29,6 @@
from ._core.anndata import AnnData
from ._core.merge import concat
from ._core.raw import Raw
-from ._io import (
- read_csv,
- read_excel,
- read_h5ad,
- read_hdf,
- read_loom,
- read_mtx,
- read_text,
- read_umi_tools,
- read_zarr,
-)
from ._settings import settings
from ._warnings import (
ExperimentalFeatureWarning,
@@ -42,12 +36,14 @@
OldFormatWarning,
WriteWarning,
)
+from .io import read_h5ad, read_zarr
+from .utils import module_get_attr_redirect
-# Experimental needs to be imported last
-from . import experimental # isort: skip
+# Submodules need to be imported last
+from . import abc, experimental, typing, io # noqa: E402 isort: skip
# We use these in tests by attribute access
-from . import _io, logging # noqa: F401 isort: skip
+from . import logging # noqa: F401, E402 isort: skip
def read(*args, **kwargs):
@@ -61,12 +57,7 @@ def read(*args, **kwargs):
return read_h5ad(*args, **kwargs)
-__all__ = [
- "__version__",
- "AnnData",
- "concat",
- "Raw",
- "read_h5ad",
+_DEPRECATED_IO = (
"read_loom",
"read_hdf",
"read_excel",
@@ -74,11 +65,34 @@ def read(*args, **kwargs):
"read_csv",
"read_text",
"read_mtx",
+)
+_DEPRECATED = dict((method, f"io.{method}") for method in _DEPRECATED_IO)
+
+
+def __getattr__(attr_name: str) -> Any:
+ return module_get_attr_redirect(attr_name, deprecated_mapping=_DEPRECATED)
+
+
+__all__ = [
+ # Attributes
+ "__version__",
+ "settings",
+ # Submodules
+ "abc",
+ "experimental",
+ "typing",
+ "io",
+ # Classes
+ "AnnData",
+ "Raw",
+ # Functions
+ "concat",
"read_zarr",
+ "read_h5ad",
+ "read",
+ # Warnings
"OldFormatWarning",
"WriteWarning",
"ImplicitModificationWarning",
"ExperimentalFeatureWarning",
- "experimental",
- "settings",
]
diff --git a/src/anndata/_core/aligned_mapping.py b/src/anndata/_core/aligned_mapping.py
index e2f6e4352..9df5ac977 100644
--- a/src/anndata/_core/aligned_mapping.py
+++ b/src/anndata/_core/aligned_mapping.py
@@ -5,7 +5,7 @@
from collections.abc import MutableMapping, Sequence
from copy import copy
from dataclasses import dataclass
-from typing import TYPE_CHECKING, Generic, TypeVar, Union
+from typing import TYPE_CHECKING, Generic, TypeVar
import numpy as np
import pandas as pd
@@ -33,10 +33,10 @@
from .raw import Raw
-OneDIdx = Union[Sequence[int], Sequence[bool], slice]
+OneDIdx = Sequence[int] | Sequence[bool] | slice
TwoDIdx = tuple[OneDIdx, OneDIdx]
# TODO: pd.DataFrame only allowed in AxisArrays?
-Value = Union[pd.DataFrame, spmatrix, np.ndarray]
+Value = pd.DataFrame | spmatrix | np.ndarray
P = TypeVar("P", bound="AlignedMappingBase")
"""Parent mapping an AlignedView is based on."""
@@ -376,9 +376,14 @@ class PairwiseArraysView(AlignedView[PairwiseArraysBase, OneDIdx], PairwiseArray
PairwiseArraysBase._actual_class = PairwiseArrays
-AlignedMapping = Union[
- AxisArrays, AxisArraysView, Layers, LayersView, PairwiseArrays, PairwiseArraysView
-]
+AlignedMapping = (
+ AxisArrays
+ | AxisArraysView
+ | Layers
+ | LayersView
+ | PairwiseArrays
+ | PairwiseArraysView
+)
T = TypeVar("T", bound=AlignedMapping)
"""Pair of types to be aligned."""
@@ -408,9 +413,7 @@ def fget(self) -> Callable[[], None]:
def fake(): ...
- fake.__annotations__ = {
- "return": Union[self.cls._actual_class, self.cls._view_class]
- }
+ fake.__annotations__ = {"return": self.cls._actual_class | self.cls._view_class}
return fake
def __get__(self, obj: None | AnnData, objtype: type | None = None) -> T:
diff --git a/src/anndata/_core/anndata.py b/src/anndata/_core/anndata.py
index 49d47665d..8a8eaf949 100644
--- a/src/anndata/_core/anndata.py
+++ b/src/anndata/_core/anndata.py
@@ -52,9 +52,10 @@
from os import PathLike
from typing import Any, Literal
+ from ..compat import Index1D
+ from ..typing import ArrayDataStructureType
from .aligned_mapping import AxisArraysView, LayersView, PairwiseArraysView
- from .index import Index, Index1D
- from .views import ArrayView
+ from .index import Index
# for backwards compat
@@ -134,15 +135,15 @@ class AnnData(metaclass=utils.DeprecationMixinMeta):
See Also
--------
- read_h5ad
- read_csv
- read_excel
- read_hdf
- read_loom
- read_zarr
- read_mtx
- read_text
- read_umi_tools
+ io.read_h5ad
+ io.read_csv
+ io.read_excel
+ io.read_hdf
+ io.read_loom
+ io.read_zarr
+ io.read_mtx
+ io.read_text
+ io.read_umi_tools
Notes
-----
@@ -272,12 +273,12 @@ def _init_as_view(self, adata_ref: AnnData, oidx: Index, vidx: Index):
"that is, you cannot make a view of a view."
)
self._is_view = True
- if isinstance(oidx, (int, np.integer)):
+ if isinstance(oidx, int | np.integer):
if not (-adata_ref.n_obs <= oidx < adata_ref.n_obs):
raise IndexError(f"Observation index `{oidx}` is out of range.")
oidx += adata_ref.n_obs * (oidx < 0)
oidx = slice(oidx, oidx + 1, 1)
- if isinstance(vidx, (int, np.integer)):
+ if isinstance(vidx, int | np.integer):
if not (-adata_ref.n_vars <= vidx < adata_ref.n_vars):
raise IndexError(f"Variable index `{vidx}` is out of range.")
vidx += adata_ref.n_vars * (vidx < 0)
@@ -297,7 +298,7 @@ def _init_as_view(self, adata_ref: AnnData, oidx: Index, vidx: Index):
var_sub = adata_ref.var.iloc[vidx]
# fix categories
uns = copy(adata_ref._uns)
- if settings.should_remove_unused_categories:
+ if settings.remove_unused_categories:
self._remove_unused_categories(adata_ref.obs, obs_sub, uns)
self._remove_unused_categories(adata_ref.var, var_sub, uns)
# set attributes
@@ -406,7 +407,7 @@ def _init_as_actual(
# as in readwrite.read_10x_h5
if X.dtype != np.dtype(dtype):
X = X.astype(dtype)
- elif isinstance(X, (ZarrArray, DaskArray)):
+ elif isinstance(X, ZarrArray | DaskArray):
X = X.astype(dtype)
else: # is np.ndarray or a subclass, convert to true np.ndarray
X = np.asarray(X, dtype)
@@ -447,7 +448,7 @@ def _init_as_actual(
# Backwards compat for connectivities matrices in uns["neighbors"]
_move_adj_mtx({"uns": self._uns, "obsp": self._obsp})
self._check_dimensions()
- if settings.should_check_uniqueness:
+ if settings.check_uniqueness:
self._check_uniqueness()
if self.filename:
@@ -541,7 +542,7 @@ def shape(self) -> tuple[int, int]:
return self.n_obs, self.n_vars
@property
- def X(self) -> np.ndarray | sparse.spmatrix | SpArray | ArrayView | None:
+ def X(self) -> ArrayDataStructureType | None:
"""Data matrix of shape :attr:`n_obs` × :attr:`n_vars`."""
if self.isbacked:
if not self.file.is_open:
@@ -696,7 +697,7 @@ def raw(self) -> Raw:
The :attr:`raw` attribute is initialized with the current content
of an object by setting::
- adata.raw = adata
+ adata.raw = adata.copy()
Its content can be deleted::
@@ -763,16 +764,14 @@ def _prep_dim_index(self, value, attr: str) -> pd.Index:
raise ValueError(
f"Length of passed value for {attr}_names is {len(value)}, but this AnnData has shape: {self.shape}"
)
- if isinstance(value, pd.Index) and not isinstance(
- value.name, (str, type(None))
- ):
+ if isinstance(value, pd.Index) and not isinstance(value.name, str | type(None)):
raise ValueError(
f"AnnData expects .{attr}.index.name to be a string or None, "
f"but you passed a name of type {type(value.name).__name__!r}"
)
else:
value = pd.Index(value)
- if not isinstance(value.name, (str, type(None))):
+ if not isinstance(value.name, str | type(None)):
value.name = None
if (
len(value) > 0
@@ -1170,9 +1169,7 @@ def _inplace_subset_obs(self, index: Index1D):
self._init_as_actual(adata_subset)
# TODO: Update, possibly remove
- def __setitem__(
- self, index: Index, val: int | float | np.ndarray | sparse.spmatrix
- ):
+ def __setitem__(self, index: Index, val: float | np.ndarray | sparse.spmatrix):
if self.is_view:
raise ValueError("Object is view and cannot be accessed with `[]`.")
obs, var = self._normalize_indices(index)
@@ -1399,7 +1396,7 @@ def to_memory(self, copy=False) -> AnnData:
.. code:: python
import anndata
- backed = anndata.read_h5ad("file.h5ad", backed="r")
+ backed = anndata.io.read_h5ad("file.h5ad", backed="r")
mem = backed[backed.obs["cluster"] == "a", :].to_memory()
"""
new = {}
@@ -1444,7 +1441,7 @@ def copy(self, filename: PathLike | None = None) -> AnnData:
else:
return self._mutated_copy()
else:
- from .._io import read_h5ad, write_h5ad
+ from ..io import read_h5ad, write_h5ad
if filename is None:
raise ValueError(
@@ -1858,7 +1855,7 @@ def write_h5ad(
Sparse arrays in AnnData object to write as dense. Currently only
supports `X` and `raw/X`.
"""
- from .._io import write_h5ad
+ from ..io import write_h5ad
if filename is None and not self.isbacked:
raise ValueError("Provide a filename!")
@@ -1894,7 +1891,7 @@ def write_csvs(self, dirname: PathLike, skip_data: bool = True, sep: str = ","):
sep
Separator for the data.
"""
- from .._io import write_csvs
+ from ..io import write_csvs
write_csvs(dirname, self, skip_data=skip_data, sep=sep)
@@ -1907,7 +1904,7 @@ def write_loom(self, filename: PathLike, write_obsm_varm: bool = False):
filename
The filename.
"""
- from .._io import write_loom
+ from ..io import write_loom
write_loom(filename, self, write_obsm_varm=write_obsm_varm)
@@ -1926,7 +1923,7 @@ def write_zarr(
chunks
Chunk shape.
"""
- from .._io import write_zarr
+ from ..io import write_zarr
write_zarr(store, self, chunks=chunks)
@@ -1976,7 +1973,7 @@ def chunk_X(
if isinstance(select, int):
select = select if select < self.n_obs else self.n_obs
choice = np.random.choice(self.n_obs, select, replace)
- elif isinstance(select, (np.ndarray, Sequence)):
+ elif isinstance(select, np.ndarray | Sequence):
choice = np.asarray(select)
else:
raise ValueError("select should be int or array")
diff --git a/src/anndata/_core/index.py b/src/anndata/_core/index.py
index 6a5e2fc39..f1d72ce0d 100644
--- a/src/anndata/_core/index.py
+++ b/src/anndata/_core/index.py
@@ -26,14 +26,8 @@ def _normalize_indices(
if isinstance(index, pd.Series):
index: Index = index.values
if isinstance(index, tuple):
- if len(index) > 2:
- raise ValueError("AnnData can only be sliced in rows and columns.")
- # deal with pd.Series
# TODO: The series should probably be aligned first
- if isinstance(index[1], pd.Series):
- index = index[0], index[1].values
- if isinstance(index[0], pd.Series):
- index = index[0].values, index[1]
+ index = tuple(i.values if isinstance(i, pd.Series) else i for i in index)
ax0, ax1 = unpack_index(index)
ax0 = _normalize_index(ax0, names0)
ax1 = _normalize_index(ax1, names1)
@@ -70,25 +64,25 @@ def name_idx(i):
stop = None if stop is None else stop + 1
step = indexer.step
return slice(start, stop, step)
- elif isinstance(indexer, (np.integer, int)):
+ elif isinstance(indexer, np.integer | int):
return indexer
elif isinstance(indexer, str):
return index.get_loc(indexer) # int
elif isinstance(
- indexer, (Sequence, np.ndarray, pd.Index, spmatrix, np.matrix, SpArray)
+ indexer, Sequence | np.ndarray | pd.Index | spmatrix | np.matrix | SpArray
):
if hasattr(indexer, "shape") and (
(indexer.shape == (index.shape[0], 1))
or (indexer.shape == (1, index.shape[0]))
):
- if isinstance(indexer, (spmatrix, SpArray)):
+ if isinstance(indexer, spmatrix | SpArray):
indexer = indexer.toarray()
indexer = np.ravel(indexer)
- if not isinstance(indexer, (np.ndarray, pd.Index)):
+ if not isinstance(indexer, np.ndarray | pd.Index):
indexer = np.array(indexer)
if len(indexer) == 0:
indexer = indexer.astype(int)
- if issubclass(indexer.dtype.type, (np.integer, np.floating)):
+ if issubclass(indexer.dtype.type, np.integer | np.floating):
return indexer # Might not work for range indexes
elif issubclass(indexer.dtype.type, np.bool_):
if indexer.shape != index.shape:
@@ -107,8 +101,7 @@ def name_idx(i):
"are not valid obs/ var names or indices."
)
return positions # np.ndarray[int]
- else:
- raise IndexError(f"Unknown indexer {indexer!r} of type {type(indexer)}")
+ raise IndexError(f"Unknown indexer {indexer!r} of type {type(indexer)}")
def _fix_slice_bounds(s: slice, length: int) -> slice:
@@ -132,13 +125,28 @@ def _fix_slice_bounds(s: slice, length: int) -> slice:
def unpack_index(index: Index) -> tuple[Index1D, Index1D]:
if not isinstance(index, tuple):
+ if index is Ellipsis:
+ index = slice(None)
return index, slice(None)
- elif len(index) == 2:
+ num_ellipsis = sum(i is Ellipsis for i in index)
+ if num_ellipsis > 1:
+ raise IndexError("an index can only have a single ellipsis ('...')")
+ # If index has Ellipsis, filter it out (and if not, error)
+ if len(index) > 2:
+ if not num_ellipsis:
+ raise IndexError("Received a length 3 index without an ellipsis")
+ index = tuple(i for i in index if i is not Ellipsis)
return index
- elif len(index) == 1:
- return index[0], slice(None)
- else:
- raise IndexError("invalid number of indices")
+ # If index has Ellipsis, replace it with slice
+ if len(index) == 2:
+ index = tuple(slice(None) if i is Ellipsis else i for i in index)
+ return index
+ if len(index) == 1:
+ index = index[0]
+ if index is Ellipsis:
+ index = slice(None)
+ return index, slice(None)
+ raise IndexError("invalid number of indices")
@singledispatch
diff --git a/src/anndata/_core/merge.py b/src/anndata/_core/merge.py
index 85d5b31ca..0dfa5dab2 100644
--- a/src/anndata/_core/merge.py
+++ b/src/anndata/_core/merge.py
@@ -174,7 +174,7 @@ def equal_sparse(a, b) -> bool:
xp = array_api_compat.array_namespace(a.data)
- if isinstance(b, (CupySparseMatrix, sparse.spmatrix, SpArray)):
+ if isinstance(b, CupySparseMatrix | sparse.spmatrix | SpArray):
if isinstance(a, CupySparseMatrix):
# Comparison broken for CSC matrices
# https://github.com/cupy/cupy/issues/7757
@@ -206,7 +206,7 @@ def equal_awkward(a, b) -> bool:
def as_sparse(x, use_sparse_array=False):
- if not isinstance(x, (sparse.spmatrix, SpArray)):
+ if not isinstance(x, sparse.spmatrix | SpArray):
if CAN_USE_SPARSE_ARRAY and use_sparse_array:
return sparse.csr_array(x)
return sparse.csr_matrix(x)
@@ -536,7 +536,7 @@ def apply(self, el, *, axis, fill_value=None):
return el
if isinstance(el, pd.DataFrame):
return self._apply_to_df(el, axis=axis, fill_value=fill_value)
- elif isinstance(el, (sparse.spmatrix, SpArray, CupySparseMatrix)):
+ elif isinstance(el, sparse.spmatrix | SpArray | CupySparseMatrix):
return self._apply_to_sparse(el, axis=axis, fill_value=fill_value)
elif isinstance(el, AwkArray):
return self._apply_to_awkward(el, axis=axis, fill_value=fill_value)
@@ -723,7 +723,14 @@ def default_fill_value(els):
This is largely due to backwards compat, and might not be the ideal solution.
"""
- if any(isinstance(el, (sparse.spmatrix, SpArray)) for el in els):
+ if any(
+ isinstance(el, sparse.spmatrix | SpArray)
+ or (
+ isinstance(el, DaskArray)
+ and isinstance(el._meta, sparse.spmatrix | SpArray)
+ )
+ for el in els
+ ):
return 0
else:
return np.nan
@@ -737,8 +744,8 @@ def gen_reindexer(new_var: pd.Index, cur_var: pd.Index):
Usage
-----
- >>> a = AnnData(sparse.eye(3), var=pd.DataFrame(index=list("abc")))
- >>> b = AnnData(sparse.eye(2), var=pd.DataFrame(index=list("ba")))
+ >>> a = AnnData(sparse.eye(3, format="csr"), var=pd.DataFrame(index=list("abc")))
+ >>> b = AnnData(sparse.eye(2, format="csr"), var=pd.DataFrame(index=list("ba")))
>>> reindexer = gen_reindexer(a.var_names, b.var_names)
>>> sparse.vstack([a.X, reindexer(b.X)]).toarray()
array([[1., 0., 0.],
@@ -794,7 +801,7 @@ def concat_arrays(arrays, reindexers, axis=0, index=None, fill_value=None):
import cupyx.scipy.sparse as cpsparse
if not all(
- isinstance(a, (CupySparseMatrix, CupyArray)) or 0 in a.shape for a in arrays
+ isinstance(a, CupySparseMatrix | CupyArray) or 0 in a.shape for a in arrays
):
raise NotImplementedError(
"Cannot concatenate a cupy array with other array types."
@@ -821,7 +828,7 @@ def concat_arrays(arrays, reindexers, axis=0, index=None, fill_value=None):
],
axis=axis,
)
- elif any(isinstance(a, (sparse.spmatrix, SpArray)) for a in arrays):
+ elif any(isinstance(a, sparse.spmatrix | SpArray) for a in arrays):
sparse_stack = (sparse.vstack, sparse.hstack)[axis]
use_sparse_array = any(issubclass(type(a), SpArray) for a in arrays)
return sparse_stack(
@@ -980,7 +987,7 @@ def concat_pairwise_mapping(
els = [
m.get(k, sparse_class((s, s), dtype=bool)) for m, s in zip(mappings, shapes)
]
- if all(isinstance(el, (CupySparseMatrix, CupyArray)) for el in els):
+ if all(isinstance(el, CupySparseMatrix | CupyArray) for el in els):
result[k] = _cp_block_diag(els, format="csr")
elif all(isinstance(el, DaskArray) for el in els):
result[k] = _dask_block_diag(els)
diff --git a/src/anndata/_core/raw.py b/src/anndata/_core/raw.py
index 7237c06b4..d138440b5 100644
--- a/src/anndata/_core/raw.py
+++ b/src/anndata/_core/raw.py
@@ -40,7 +40,7 @@ def __init__(
# construct manually
if adata.isbacked == (X is None):
# Move from GPU to CPU since it's large and not always used
- if isinstance(X, (CupyArray, CupySparseMatrix)):
+ if isinstance(X, CupyArray | CupySparseMatrix):
self._X = X.get()
else:
self._X = X
@@ -51,7 +51,7 @@ def __init__(
self.varm = varm
elif X is None: # construct from adata
# Move from GPU to CPU since it's large and not always used
- if isinstance(adata.X, (CupyArray, CupySparseMatrix)):
+ if isinstance(adata.X, CupyArray | CupySparseMatrix):
self._X = adata.X.get()
else:
self._X = adata.X.copy()
@@ -124,9 +124,9 @@ def __getitem__(self, index):
oidx, vidx = self._normalize_indices(index)
# To preserve two dimensional shape
- if isinstance(vidx, (int, np.integer)):
+ if isinstance(vidx, int | np.integer):
vidx = slice(vidx, vidx + 1, 1)
- if isinstance(oidx, (int, np.integer)):
+ if isinstance(oidx, int | np.integer):
oidx = slice(oidx, oidx + 1, 1)
if not self._adata.isbacked:
diff --git a/src/anndata/_core/sparse_dataset.py b/src/anndata/_core/sparse_dataset.py
index 12a5ef19b..ae6b47c7f 100644
--- a/src/anndata/_core/sparse_dataset.py
+++ b/src/anndata/_core/sparse_dataset.py
@@ -26,26 +26,21 @@
import scipy.sparse as ss
from scipy.sparse import _sparsetools
-from anndata._core.index import _fix_slice_bounds
-from anndata.compat import H5Group, ZarrArray, ZarrGroup
-
-from ..compat import SpArray, _read_attr
-
-try:
- # Not really important, just for IDEs to be more helpful
- from scipy.sparse._compressed import _cs_matrix
-except ImportError:
- from scipy.sparse import spmatrix as _cs_matrix
-
-
-from .index import _subset, unpack_index
+from .. import abc
+from .._settings import settings
+from ..compat import H5Group, SpArray, ZarrArray, ZarrGroup, _read_attr
+from .index import _fix_slice_bounds, _subset, unpack_index
if TYPE_CHECKING:
from collections.abc import Sequence
from typing import Literal
+ from scipy.sparse._compressed import _cs_matrix
+
from .._types import GroupStorageType
from .index import Index
+else:
+ from scipy.sparse import spmatrix as _cs_matrix
class BackedFormat(NamedTuple):
@@ -234,6 +229,8 @@ def _get_sliceXarray(self, row: slice, col: Sequence[int]) -> ss.csc_matrix:
FORMATS = [
BackedFormat("csr", backed_csr_matrix, ss.csr_matrix),
BackedFormat("csc", backed_csc_matrix, ss.csc_matrix),
+ BackedFormat("csr", backed_csr_matrix, ss.csr_array),
+ BackedFormat("csc", backed_csc_matrix, ss.csc_array),
]
@@ -346,25 +343,19 @@ def _get_group_format(group: GroupStorageType) -> str:
def is_sparse_indexing_overridden(format: Literal["csr", "csc"], row, col):
major_indexer, minor_indexer = (row, col) if format == "csr" else (col, row)
return isinstance(minor_indexer, slice) and (
- (isinstance(major_indexer, (int, np.integer)))
+ (isinstance(major_indexer, int | np.integer))
or (isinstance(major_indexer, slice))
or (isinstance(major_indexer, np.ndarray) and major_indexer.ndim == 1)
)
-class BaseCompressedSparseDataset(ABC):
- """Analogous to :class:`h5py.Dataset ` or `zarr.Array`, but for sparse matrices."""
-
- format: Literal["csr", "csc"]
+class BaseCompressedSparseDataset(abc._AbstractCSDataset, ABC):
_group: GroupStorageType
def __init__(self, group: GroupStorageType):
type(self)._check_group_format(group)
self._group = group
- shape: tuple[int, int]
- """Shape of the matrix."""
-
@property
def group(self) -> GroupStorageType:
"""The group underlying the backed matrix."""
@@ -378,6 +369,7 @@ def group(self, val):
@property
def backend(self) -> Literal["zarr", "hdf5"]:
+ """Which file type is used on-disk."""
if isinstance(self.group, ZarrGroup):
return "zarr"
elif isinstance(self.group, H5Group):
@@ -387,6 +379,7 @@ def backend(self) -> Literal["zarr", "hdf5"]:
@property
def dtype(self) -> np.dtype:
+ """The :class:`numpy.dtype` of the `data` attribute of the sparse matrix."""
return self.group["data"].dtype
@classmethod
@@ -395,43 +388,26 @@ def _check_group_format(cls, group):
assert group_format == cls.format
@property
- def format_str(self) -> Literal["csr", "csc"]:
- """DEPRECATED Use .format instead."""
- warnings.warn(
- "The attribute .format_str is deprecated and will be removed in the anndata 0.11.0. "
- "Please use .format instead.",
- FutureWarning,
- )
- return self.format
-
- @property
- def name(self) -> str:
+ def _name(self) -> str:
+ """Name of the group."""
return self.group.name
@property
def shape(self) -> tuple[int, int]:
+ """Shape of the matrix read off disk."""
shape = _read_attr(self.group.attrs, "shape", None)
if shape is None:
# TODO warn
shape = self.group.attrs.get("h5sparse_shape")
return tuple(map(int, shape))
- @property
- def value(self) -> ss.csr_matrix | ss.csc_matrix:
- """DEPRECATED Use .to_memory() instead."""
- warnings.warn(
- "The .value attribute is deprecated and will be removed in the anndata 0.11.0. "
- "Please use .to_memory() instead.",
- FutureWarning,
- )
- return self.to_memory()
-
def __repr__(self) -> str:
- return f"{type(self).__name__}: backend {self.backend}, shape {self.shape}, data_dtype {self.dtype}"
+ name = type(self).__name__.removeprefix("_")
+ return f"{name}: backend {self.backend}, shape {self.shape}, data_dtype {self.dtype}"
def __getitem__(
self, index: Index | tuple[()]
- ) -> float | ss.csr_matrix | ss.csc_matrix:
+ ) -> float | ss.csr_matrix | ss.csc_matrix | SpArray:
indices = self._normalize_index(index)
row, col = indices
mtx = self._to_backed()
@@ -458,8 +434,15 @@ def __getitem__(
# If indexing is array x array it returns a backed_sparse_matrix
# Not sure what the performance is on that operation
- if isinstance(sub, BackedSparseMatrix):
- return get_memory_class(self.format)(sub)
+ # Also need to check if memory format is not matrix
+ mtx_fmt = get_memory_class(
+ self.format, use_sparray_in_io=settings.use_sparse_array_on_read
+ )
+ must_convert_to_array = issubclass(mtx_fmt, SpArray) and not isinstance(
+ sub, SpArray
+ )
+ if isinstance(sub, BackedSparseMatrix) or must_convert_to_array:
+ return mtx_fmt(sub)
else:
return sub
@@ -483,7 +466,25 @@ def __setitem__(self, index: Index | tuple[()], value) -> None:
mock_matrix[row, col] = value
# TODO: split to other classes?
- def append(self, sparse_matrix: _cs_matrix | SpArray) -> None:
+ def append(self, sparse_matrix: ss.csr_matrix | ss.csc_matrix | SpArray) -> None:
+ """Append an in-memory or on-disk sparse matrix to the current object's store.
+
+ Parameters
+ ----------
+ sparse_matrix
+ The matrix to append.
+
+ Raises
+ ------
+ NotImplementedError
+ If the matrix to append is not one of :class:`~scipy.sparse.csr_array`, :class:`~scipy.sparse.csc_array`, :class:`~scipy.sparse.csr_matrix`, or :class:`~scipy.sparse.csc_matrix`.
+ ValueError
+ If both the on-disk and to-append matrices are not of the same format i.e., `csr` or `csc`.
+ OverflowError
+ If the underlying data store has a 32 bit indptr, and the new matrix is too large to fit in it i.e., would cause a 64 bit `indptr` to be written.
+ AssertionError
+ If the on-disk data does not have `csc` or `csr` format.
+ """
# Prep variables
shape = self.shape
if isinstance(sparse_matrix, BaseCompressedSparseDataset):
@@ -546,7 +547,7 @@ def append(self, sparse_matrix: _cs_matrix | SpArray) -> None:
)
# Clear cached property
if hasattr(self, "indptr"):
- del self.indptr
+ del self._indptr
# indices
indices = self.group["indices"]
@@ -555,7 +556,7 @@ def append(self, sparse_matrix: _cs_matrix | SpArray) -> None:
indices[orig_data_size:] = sparse_matrix.indices
@cached_property
- def indptr(self) -> np.ndarray:
+ def _indptr(self) -> np.ndarray:
"""\
Other than `data` and `indices`, this is only as long as the major axis
@@ -569,39 +570,29 @@ def _to_backed(self) -> BackedSparseMatrix:
mtx = format_class(self.shape, dtype=self.dtype)
mtx.data = self.group["data"]
mtx.indices = self.group["indices"]
- mtx.indptr = self.indptr
+ mtx.indptr = self._indptr
return mtx
- def to_memory(self) -> ss.csr_matrix | ss.csc_matrix:
- format_class = get_memory_class(self.format)
+ def to_memory(self) -> ss.csr_matrix | ss.csc_matrix | SpArray:
+ format_class = get_memory_class(
+ self.format, use_sparray_in_io=settings.use_sparse_array_on_read
+ )
mtx = format_class(self.shape, dtype=self.dtype)
mtx.data = self.group["data"][...]
mtx.indices = self.group["indices"][...]
- mtx.indptr = self.indptr
+ mtx.indptr = self._indptr
return mtx
-_sparse_dataset_doc = """\
- On disk {format} sparse matrix.
+class _CSRDataset(BaseCompressedSparseDataset, abc.CSRDataset):
+ """Internal concrete version of :class:`anndata.abc.CSRDataset`."""
- Parameters
- ----------
- group
- The backing group store.
-"""
+class _CSCDataset(BaseCompressedSparseDataset, abc.CSCDataset):
+ """Internal concrete version of :class:`anndata.abc.CSRDataset`."""
-class CSRDataset(BaseCompressedSparseDataset):
- __doc__ = _sparse_dataset_doc.format(format="CSR")
- format = "csr"
-
-class CSCDataset(BaseCompressedSparseDataset):
- __doc__ = _sparse_dataset_doc.format(format="CSC")
- format = "csc"
-
-
-def sparse_dataset(group: GroupStorageType) -> CSRDataset | CSCDataset:
+def sparse_dataset(group: GroupStorageType) -> abc.CSRDataset | abc.CSCDataset:
"""Generates a backed mode-compatible sparse dataset class.
Parameters
@@ -620,7 +611,8 @@ def sparse_dataset(group: GroupStorageType) -> CSRDataset | CSCDataset:
>>> import scanpy as sc
>>> import h5py
- >>> from anndata.experimental import sparse_dataset, read_elem
+ >>> from anndata.io import sparse_dataset
+ >>> from anndata.io import read_elem
>>> sc.datasets.pbmc68k_reduced().raw.to_adata().write_h5ad("pbmc.h5ad")
Initialize a sparse dataset from storage
@@ -653,38 +645,12 @@ def sparse_dataset(group: GroupStorageType) -> CSRDataset | CSCDataset:
"""
encoding_type = _get_group_format(group)
if encoding_type == "csr":
- return CSRDataset(group)
+ return _CSRDataset(group)
elif encoding_type == "csc":
- return CSCDataset(group)
+ return _CSCDataset(group)
+ raise ValueError(f"Unknown encoding type {encoding_type}")
@_subset.register(BaseCompressedSparseDataset)
def subset_sparsedataset(d, subset_idx):
return d[subset_idx]
-
-
-## Backwards compat
-
-_sparsedataset_depr_msg = """\
-SparseDataset is deprecated and will be removed in late 2024. It has been replaced by the public classes CSRDataset and CSCDataset.
-
-For instance checks, use `isinstance(X, (anndata.experimental.CSRDataset, anndata.experimental.CSCDataset))` instead.
-
-For creation, use `anndata.experimental.sparse_dataset(X)` instead.
-"""
-
-
-class SparseDataset(ABC):
- """DEPRECATED.
-
- Use CSRDataset, CSCDataset, and sparse_dataset from anndata.experimental instead.
- """
-
- def __new__(cls, group):
- warnings.warn(FutureWarning(_sparsedataset_depr_msg), stacklevel=2)
- return sparse_dataset(group)
-
- @classmethod
- def __subclasshook__(cls, C):
- warnings.warn(FutureWarning(_sparsedataset_depr_msg), stacklevel=3)
- return issubclass(C, (CSRDataset, CSCDataset))
diff --git a/src/anndata/_core/storage.py b/src/anndata/_core/storage.py
index 75e7b4ecf..9e036ba44 100644
--- a/src/anndata/_core/storage.py
+++ b/src/anndata/_core/storage.py
@@ -1,74 +1,23 @@
from __future__ import annotations
import warnings
-from enum import Enum
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, get_args
import numpy as np
import pandas as pd
-from numpy import ma
from scipy import sparse
from .._warnings import ImplicitModificationWarning
-from ..compat import (
- AwkArray,
- CupyArray,
- CupySparseMatrix,
- DaskArray,
- H5Array,
- SpArray,
- ZappyArray,
- ZarrArray,
-)
from ..utils import (
ensure_df_homogeneous,
join_english,
raise_value_error_if_multiindex_columns,
)
-from .sparse_dataset import BaseCompressedSparseDataset
if TYPE_CHECKING:
- from collections.abc import Generator
from typing import Any
-class ArrayDataStructureType(Enum):
- # Memory
- Array = (np.ndarray, "np.ndarray")
- Masked = (ma.MaskedArray, "numpy.ma.core.MaskedArray")
- Sparse = (sparse.spmatrix, "scipy.sparse.spmatrix")
- SparseArray = (SpArray, "scipy.sparse.sparray")
- AwkArray = (AwkArray, "awkward.Array")
- # Backed
- HDF5Dataset = (H5Array, "h5py.Dataset")
- ZarrArray = (ZarrArray, "zarr.Array")
- ZappyArray = (ZappyArray, "zappy.base.ZappyArray")
- BackedSparseMatrix = (
- BaseCompressedSparseDataset,
- "anndata.experimental.[CSC,CSR]Dataset",
- )
- # Distributed
- DaskArray = (DaskArray, "dask.array.Array")
- CupyArray = (CupyArray, "cupy.ndarray")
- CupySparseMatrix = (CupySparseMatrix, "cupyx.scipy.sparse.spmatrix")
-
- @property
- def cls(self):
- return self.value[0]
-
- @property
- def qualname(self):
- return self.value[1]
-
- @classmethod
- def classes(cls) -> tuple[type, ...]:
- return tuple(v.cls for v in cls)
-
- @classmethod
- def qualnames(cls) -> Generator[str, None, None]:
- yield from (v.qualname for v in cls)
-
-
def coerce_array(
value: Any,
*,
@@ -77,16 +26,27 @@ def coerce_array(
allow_array_like: bool = False,
):
"""Coerce arrays stored in layers/X, and aligned arrays ({obs,var}{m,p})."""
+ from ..typing import ArrayDataStructureType
+
# If value is a scalar and we allow that, return it
if allow_array_like and np.isscalar(value):
return value
# If value is one of the allowed types, return it
- if isinstance(value, ArrayDataStructureType.classes()):
+ array_data_structure_types = get_args(ArrayDataStructureType)
+ if isinstance(value, array_data_structure_types):
if isinstance(value, np.matrix):
msg = f"{name} should not be a np.matrix, use np.ndarray instead."
warnings.warn(msg, ImplicitModificationWarning)
value = value.A
return value
+ elif isinstance(value, sparse.spmatrix):
+ msg = (
+ f"AnnData previously had undefined behavior around matrices of type {type(value)}."
+ "In 0.12, passing in this type will throw an error. Please convert to a supported type."
+ "Continue using for this minor version at your own risk."
+ )
+ warnings.warn(msg, FutureWarning)
+ return value
if isinstance(value, pd.DataFrame):
if allow_df:
raise_value_error_if_multiindex_columns(value, name)
@@ -100,7 +60,7 @@ def coerce_array(
except (ValueError, TypeError) as _e:
e = _e
# if value isn’t the right type or convertible, raise an error
- msg = f"{name} needs to be of one of {join_english(ArrayDataStructureType.qualnames())}, not {type(value)}."
+ msg = f"{name} needs to be of one of {join_english(map(str, array_data_structure_types))}, not {type(value)}."
if e is not None:
msg += " (Failed to convert it to an array, see above for details.)"
raise ValueError(msg) from e
diff --git a/src/anndata/_io/__init__.py b/src/anndata/_io/__init__.py
index 9315d3369..8fbd55df3 100644
--- a/src/anndata/_io/__init__.py
+++ b/src/anndata/_io/__init__.py
@@ -1,40 +1,17 @@
from __future__ import annotations
-from .h5ad import read_h5ad, write_h5ad
-from .read import (
- read_csv,
- read_excel,
- read_hdf,
- read_loom,
- read_mtx,
- read_text,
- read_umi_tools,
- read_zarr,
-)
-from .write import write_csvs, write_loom
+import warnings
+__all__: list[str] = []
-def write_zarr(*args, **kw):
- from .zarr import write_zarr
- return write_zarr(*args, **kw)
+def __getattr__(key: str):
+ from .. import io
-
-# We use this in test by attribute access
-from . import specs # noqa: F401, E402
-
-__all__ = [
- "read_csv",
- "read_excel",
- "read_h5ad",
- "read_hdf",
- "read_loom",
- "read_mtx",
- "read_text",
- "read_umi_tools",
- "read_zarr",
- "write_csvs",
- "write_h5ad",
- "write_loom",
- "write_zarr",
-]
+ attr = getattr(io, key)
+ warnings.warn(
+ f"Importing {key} from `anndata._io` is deprecated. "
+ "Please use anndata.io instead.",
+ FutureWarning,
+ )
+ return attr
diff --git a/src/anndata/_io/h5ad.py b/src/anndata/_io/h5ad.py
index 36429403d..edf4977cc 100644
--- a/src/anndata/_io/h5ad.py
+++ b/src/anndata/_io/h5ad.py
@@ -82,14 +82,14 @@ def write_h5ad(
f.attrs.setdefault("encoding-version", "0.1.0")
if "X" in as_dense and isinstance(
- adata.X, (sparse.spmatrix, BaseCompressedSparseDataset)
+ adata.X, sparse.spmatrix | BaseCompressedSparseDataset
):
write_sparse_as_dense(f, "X", adata.X, dataset_kwargs=dataset_kwargs)
elif not (adata.isbacked and Path(adata.filename) == Path(filepath)):
# If adata.isbacked, X should already be up to date
write_elem(f, "X", adata.X, dataset_kwargs=dataset_kwargs)
if "raw/X" in as_dense and isinstance(
- adata.raw.X, (sparse.spmatrix, BaseCompressedSparseDataset)
+ adata.raw.X, sparse.spmatrix | BaseCompressedSparseDataset
):
write_sparse_as_dense(
f, "raw/X", adata.raw.X, dataset_kwargs=dataset_kwargs
diff --git a/src/anndata/_io/read.py b/src/anndata/_io/read.py
index a50c4b2ef..f22cff351 100644
--- a/src/anndata/_io/read.py
+++ b/src/anndata/_io/read.py
@@ -21,14 +21,6 @@
if TYPE_CHECKING:
from collections.abc import Generator, Iterable, Iterator, Mapping
-try:
- from .zarr import read_zarr
-except ImportError as _e:
- e = _e
-
- def read_zarr(*_, **__):
- raise e
-
def read_csv(
filename: PathLike | Iterator[str],
@@ -39,7 +31,7 @@ def read_csv(
"""\
Read `.csv` file.
- Same as :func:`~anndata.read_text` but with default delimiter `','`.
+ Same as :func:`~anndata.io.read_text` but with default delimiter `','`.
Parameters
----------
@@ -208,7 +200,7 @@ def read_loom(
.. code:: python
- pbmc = anndata.read_loom(
+ pbmc = anndata.io.read_loom(
"pbmc.loom",
sparse=True,
X_name="lognorm",
@@ -330,7 +322,7 @@ def read_text(
"""\
Read `.txt`, `.tab`, `.data` (text) file.
- Same as :func:`~anndata.read_csv` but with default delimiter `None`.
+ Same as :func:`~anndata.io.read_csv` but with default delimiter `None`.
Parameters
----------
@@ -345,7 +337,7 @@ def read_text(
dtype
Numpy data type.
"""
- if not isinstance(filename, (PathLike, str, bytes)):
+ if not isinstance(filename, PathLike | str | bytes):
return _read_text(filename, delimiter, first_column_names, dtype)
filename = Path(filename)
diff --git a/src/anndata/_io/specs/lazy_methods.py b/src/anndata/_io/specs/lazy_methods.py
index 8a1b31e6b..a34f627e7 100644
--- a/src/anndata/_io/specs/lazy_methods.py
+++ b/src/anndata/_io/specs/lazy_methods.py
@@ -19,7 +19,7 @@
from collections.abc import Callable, Generator, Mapping, Sequence
from typing import Literal, ParamSpec, TypeVar
- from ..._core.sparse_dataset import CSCDataset, CSRDataset
+ from ..._core.sparse_dataset import _CSCDataset, _CSRDataset
from ..._types import ArrayStorageType, StorageType
from ...compat import DaskArray
from .registry import DaskReader
@@ -66,7 +66,7 @@ def make_dask_chunk(
block_info: BlockInfo | None = None,
*,
wrap: Callable[[ArrayStorageType], ArrayStorageType]
- | Callable[[H5Group | ZarrGroup], CSRDataset | CSCDataset] = lambda g: g,
+ | Callable[[H5Group | ZarrGroup], _CSRDataset | _CSCDataset] = lambda g: g,
):
if block_info is None:
msg = "Block info is required"
@@ -105,12 +105,16 @@ def read_sparse_as_dask(
if chunks is not None:
if len(chunks) != 2:
raise ValueError("`chunks` must be a tuple of two integers")
- if chunks[minor_dim] != shape[minor_dim]:
+ if chunks[minor_dim] not in {shape[minor_dim], -1, None}:
raise ValueError(
"Only the major axis can be chunked. "
f"Try setting chunks to {((-1, _DEFAULT_STRIDE) if is_csc else (_DEFAULT_STRIDE, -1))}"
)
- stride = chunks[major_dim]
+ stride = (
+ chunks[major_dim]
+ if chunks[major_dim] not in {None, -1}
+ else shape[major_dim]
+ )
shape_minor, shape_major = shape if is_csc else shape[::-1]
chunks_major = compute_chunk_layout_for_axis_shape(stride, shape_major)
@@ -120,7 +124,7 @@ def read_sparse_as_dask(
)
memory_format = sparse.csc_matrix if is_csc else sparse.csr_matrix
make_chunk = partial(
- make_dask_chunk, path_or_group, elem_name, wrap=ad.experimental.sparse_dataset
+ make_dask_chunk, path_or_group, elem_name, wrap=ad.io.sparse_dataset
)
da_mtx = da.map_blocks(
make_chunk,
@@ -142,7 +146,11 @@ def read_h5_array(
shape = tuple(elem.shape)
dtype = elem.dtype
chunks: tuple[int, ...] = (
- chunks if chunks is not None else (_DEFAULT_STRIDE,) * len(shape)
+ tuple(
+ c if c not in {None, -1} else s for c, s in zip(chunks, shape, strict=True)
+ )
+ if chunks is not None
+ else (_DEFAULT_STRIDE,) * len(shape)
)
chunk_layout = tuple(
diff --git a/src/anndata/_io/specs/methods.py b/src/anndata/_io/specs/methods.py
index f6916cba1..19cd1f66f 100644
--- a/src/anndata/_io/specs/methods.py
+++ b/src/anndata/_io/specs/methods.py
@@ -1,6 +1,8 @@
from __future__ import annotations
+import warnings
from collections.abc import Mapping
+from copy import copy
from functools import partial
from itertools import product
from types import MappingProxyType
@@ -10,6 +12,7 @@
import h5py
import numpy as np
import pandas as pd
+from packaging.version import Version
from scipy import sparse
import anndata as ad
@@ -17,7 +20,7 @@
from anndata._core import views
from anndata._core.index import _normalize_indices
from anndata._core.merge import intersect_keys
-from anndata._core.sparse_dataset import CSCDataset, CSRDataset, sparse_dataset
+from anndata._core.sparse_dataset import _CSCDataset, _CSRDataset, sparse_dataset
from anndata._io.utils import H5PY_V3, check_key
from anndata._warnings import OldFormatWarning
from anndata.compat import (
@@ -37,21 +40,20 @@
_require_group_write_dataframe,
)
+from ..._settings import settings
from .registry import _REGISTRY, IOSpec, read_elem, read_elem_partial
if TYPE_CHECKING:
+ from collections.abc import Callable
from os import PathLike
from typing import Any, Literal
from numpy import typing as npt
+ from numpy.typing import NDArray
- from anndata._types import (
- ArrayStorageType,
- GroupStorageType,
- InMemoryArrayOrScalarType,
- RWAble,
- )
+ from anndata._types import ArrayStorageType, GroupStorageType
from anndata.compat import SpArray
+ from anndata.typing import AxisStorable, InMemoryArrayOrScalarType
from .registry import Reader, Writer
@@ -332,7 +334,7 @@ def write_raw(
@_REGISTRY.register_read(H5Group, IOSpec("dict", "0.1.0"))
@_REGISTRY.register_read(ZarrGroup, IOSpec("dict", "0.1.0"))
-def read_mapping(elem: GroupStorageType, *, _reader: Reader) -> dict[str, RWAble]:
+def read_mapping(elem: GroupStorageType, *, _reader: Reader) -> dict[str, AxisStorable]:
return {k: _reader.read_elem(v) for k, v in elem.items()}
@@ -341,7 +343,7 @@ def read_mapping(elem: GroupStorageType, *, _reader: Reader) -> dict[str, RWAble
def write_mapping(
f: GroupStorageType,
k: str,
- v: dict[str, RWAble],
+ v: dict[str, AxisStorable],
*,
_writer: Writer,
dataset_kwargs: Mapping[str, Any] = MappingProxyType({}),
@@ -361,7 +363,7 @@ def write_mapping(
def write_list(
f: GroupStorageType,
k: str,
- elem: list[RWAble],
+ elem: list[AxisStorable],
*,
_writer: Writer,
dataset_kwargs: Mapping[str, Any] = MappingProxyType({}),
@@ -548,10 +550,12 @@ def write_vlen_string_array_zarr(
):
import numcodecs
- # Workaround for https://github.com/zarr-developers/numcodecs/issues/514
- # TODO: Warn to upgrade numcodecs if fixed
- if not elem.flags.writeable:
- elem = elem.copy()
+ if Version(numcodecs.__version__) < Version("0.13"):
+ msg = "Old numcodecs version detected. Please update for improved performance and stability."
+ warnings.warn(msg)
+ # Workaround for https://github.com/zarr-developers/numcodecs/issues/514
+ if hasattr(elem, "flags") and not elem.flags.writeable:
+ elem = elem.copy()
f.create_dataset(
k,
@@ -684,14 +688,14 @@ def write_sparse_compressed(
_REGISTRY.register_write(store_type, cls, spec)(func)
-@_REGISTRY.register_write(H5Group, CSRDataset, IOSpec("", "0.1.0"))
-@_REGISTRY.register_write(H5Group, CSCDataset, IOSpec("", "0.1.0"))
-@_REGISTRY.register_write(ZarrGroup, CSRDataset, IOSpec("", "0.1.0"))
-@_REGISTRY.register_write(ZarrGroup, CSCDataset, IOSpec("", "0.1.0"))
+@_REGISTRY.register_write(H5Group, _CSRDataset, IOSpec("", "0.1.0"))
+@_REGISTRY.register_write(H5Group, _CSCDataset, IOSpec("", "0.1.0"))
+@_REGISTRY.register_write(ZarrGroup, _CSRDataset, IOSpec("", "0.1.0"))
+@_REGISTRY.register_write(ZarrGroup, _CSCDataset, IOSpec("", "0.1.0"))
def write_sparse_dataset(
f: GroupStorageType,
k: str,
- elem: CSCDataset | CSRDataset,
+ elem: _CSCDataset | _CSRDataset,
*,
_writer: Writer,
dataset_kwargs: Mapping[str, Any] = MappingProxyType({}),
@@ -835,6 +839,9 @@ def write_awkward(
from anndata.compat import awkward as ak
group = f.require_group(k)
+ if isinstance(v, views.AwkwardArrayView):
+ # copy to remove the view attributes
+ v = copy(v)
form, length, container = ak.to_buffers(ak.to_packed(v))
group.attrs["length"] = length
group.attrs["form"] = form.to_json()
@@ -1060,44 +1067,85 @@ def read_partial_categorical(elem, *, items=None, indices=(slice(None),)):
@_REGISTRY.register_write(
ZarrGroup, pd.arrays.BooleanArray, IOSpec("nullable-boolean", "0.1.0")
)
-def write_nullable_integer(
+@_REGISTRY.register_write(
+ H5Group, pd.arrays.StringArray, IOSpec("nullable-string-array", "0.1.0")
+)
+@_REGISTRY.register_write(
+ ZarrGroup, pd.arrays.StringArray, IOSpec("nullable-string-array", "0.1.0")
+)
+def write_nullable(
f: GroupStorageType,
k: str,
- v: pd.arrays.IntegerArray | pd.arrays.BooleanArray,
+ v: pd.arrays.IntegerArray | pd.arrays.BooleanArray | pd.arrays.StringArray,
*,
_writer: Writer,
dataset_kwargs: Mapping[str, Any] = MappingProxyType({}),
):
+ if (
+ isinstance(v, pd.arrays.StringArray)
+ and not settings.allow_write_nullable_strings
+ ):
+ msg = (
+ "`anndata.settings.allow_write_nullable_strings` is False, "
+ "because writing of `pd.arrays.StringArray` is new "
+ "and not supported in anndata < 0.11, still use by many people. "
+ "Opt-in to writing these arrays by toggling the setting to True."
+ )
+ raise RuntimeError(msg)
g = f.require_group(k)
- if v._mask is not None:
- _writer.write_elem(g, "mask", v._mask, dataset_kwargs=dataset_kwargs)
- _writer.write_elem(g, "values", v._data, dataset_kwargs=dataset_kwargs)
+ values = (
+ v.to_numpy(na_value="")
+ if isinstance(v, pd.arrays.StringArray)
+ else v.to_numpy(na_value=0, dtype=v.dtype.numpy_dtype)
+ )
+ _writer.write_elem(g, "values", values, dataset_kwargs=dataset_kwargs)
+ _writer.write_elem(g, "mask", v.isna(), dataset_kwargs=dataset_kwargs)
-@_REGISTRY.register_read(H5Group, IOSpec("nullable-integer", "0.1.0"))
-@_REGISTRY.register_read(ZarrGroup, IOSpec("nullable-integer", "0.1.0"))
-def read_nullable_integer(
- elem: GroupStorageType, *, _reader: Reader
+def _read_nullable(
+ elem: GroupStorageType,
+ *,
+ _reader: Reader,
+ # BaseMaskedArray
+ array_type: Callable[
+ [NDArray[np.number], NDArray[np.bool_]], pd.api.extensions.ExtensionArray
+ ],
) -> pd.api.extensions.ExtensionArray:
- if "mask" in elem:
- return pd.arrays.IntegerArray(
- _reader.read_elem(elem["values"]), mask=_reader.read_elem(elem["mask"])
- )
- else:
- return pd.array(_reader.read_elem(elem["values"]))
+ return array_type(
+ _reader.read_elem(elem["values"]),
+ mask=_reader.read_elem(elem["mask"]),
+ )
-@_REGISTRY.register_read(H5Group, IOSpec("nullable-boolean", "0.1.0"))
-@_REGISTRY.register_read(ZarrGroup, IOSpec("nullable-boolean", "0.1.0"))
-def read_nullable_boolean(
- elem: GroupStorageType, *, _reader: Reader
+def _string_array(
+ values: np.ndarray, mask: np.ndarray
) -> pd.api.extensions.ExtensionArray:
- if "mask" in elem:
- return pd.arrays.BooleanArray(
- _reader.read_elem(elem["values"]), mask=_reader.read_elem(elem["mask"])
- )
- else:
- return pd.array(_reader.read_elem(elem["values"]))
+ """Construct a string array from values and mask."""
+ arr = pd.array(values, dtype="string")
+ arr[mask] = pd.NA
+ return arr
+
+
+_REGISTRY.register_read(H5Group, IOSpec("nullable-integer", "0.1.0"))(
+ read_nullable_integer := partial(_read_nullable, array_type=pd.arrays.IntegerArray)
+)
+_REGISTRY.register_read(ZarrGroup, IOSpec("nullable-integer", "0.1.0"))(
+ read_nullable_integer
+)
+
+_REGISTRY.register_read(H5Group, IOSpec("nullable-boolean", "0.1.0"))(
+ read_nullable_boolean := partial(_read_nullable, array_type=pd.arrays.BooleanArray)
+)
+_REGISTRY.register_read(ZarrGroup, IOSpec("nullable-boolean", "0.1.0"))(
+ read_nullable_boolean
+)
+
+_REGISTRY.register_read(H5Group, IOSpec("nullable-string-array", "0.1.0"))(
+ read_nullable_string := partial(_read_nullable, array_type=_string_array)
+)
+_REGISTRY.register_read(ZarrGroup, IOSpec("nullable-string-array", "0.1.0"))(
+ read_nullable_string
+)
###########
@@ -1137,17 +1185,19 @@ def write_hdf5_scalar(
f.create_dataset(key, data=np.array(value), **dataset_kwargs)
-# fmt: off
for numeric_scalar_type in [
- bool, np.bool_,
- np.uint8, np.uint16, np.uint32, np.uint64,
- int, np.int8, np.int16, np.int32, np.int64,
- float, *np.floating.__subclasses__(),
+ *(bool, np.bool_),
+ *(np.uint8, np.uint16, np.uint32, np.uint64),
+ *(int, np.int8, np.int16, np.int32, np.int64),
+ *(float, *np.floating.__subclasses__()),
*np.complexfloating.__subclasses__(),
]:
- _REGISTRY.register_write(H5Group, numeric_scalar_type, IOSpec("numeric-scalar", "0.2.0"))(write_hdf5_scalar)
- _REGISTRY.register_write(ZarrGroup, numeric_scalar_type, IOSpec("numeric-scalar", "0.2.0"))(write_scalar)
-# fmt: on
+ _REGISTRY.register_write(
+ H5Group, numeric_scalar_type, IOSpec("numeric-scalar", "0.2.0")
+ )(write_hdf5_scalar)
+ _REGISTRY.register_write(
+ ZarrGroup, numeric_scalar_type, IOSpec("numeric-scalar", "0.2.0")
+ )(write_scalar)
_REGISTRY.register_write(ZarrGroup, str, IOSpec("string", "0.2.0"))(write_scalar)
_REGISTRY.register_write(ZarrGroup, np.str_, IOSpec("string", "0.2.0"))(write_scalar)
diff --git a/src/anndata/_io/specs/registry.py b/src/anndata/_io/specs/registry.py
index 9a46ba9aa..ca13f8e59 100644
--- a/src/anndata/_io/specs/registry.py
+++ b/src/anndata/_io/specs/registry.py
@@ -17,13 +17,13 @@
from anndata._types import (
GroupStorageType,
- InMemoryElem,
ReadCallback,
StorageType,
Write,
WriteCallback,
_WriteInternal,
)
+ from anndata.typing import RWAble
T = TypeVar("T")
W = TypeVar("W", bound=_WriteInternal)
@@ -270,7 +270,7 @@ def read_elem(
self,
elem: StorageType,
modifiers: frozenset[str] = frozenset(),
- ) -> InMemoryElem:
+ ) -> RWAble:
"""Read an element from a store. See exported function for more details."""
iospec = get_spec(elem)
@@ -323,7 +323,7 @@ def write_elem(
self,
store: GroupStorageType,
k: str,
- elem: InMemoryElem,
+ elem: RWAble,
*,
dataset_kwargs: Mapping[str, Any] = MappingProxyType({}),
modifiers: frozenset[str] = frozenset(),
@@ -363,7 +363,7 @@ def write_elem(
)
-def read_elem(elem: StorageType) -> InMemoryElem:
+def read_elem(elem: StorageType) -> RWAble:
"""
Read an element from a store.
@@ -395,10 +395,68 @@ def read_elem_as_dask(
chunks, optional
length `n`, the same `n` as the size of the underlying array.
Note that the minor axis dimension must match the shape for sparse.
+ Defaults to `(1000, adata.shape[1])` for CSR sparse,
+ `(adata.shape[0], 1000)` for CSC sparse,
+ and the on-disk chunking otherwise for dense.
+ Can use `-1` or `None` to indicate use of the size of the corresponding dimension.
Returns
-------
DaskArray
+
+ Examples
+ --------
+
+ Setting up our example:
+
+ >>> from scanpy.datasets import pbmc3k
+ >>> import tempfile
+ >>> import anndata as ad
+ >>> import zarr
+
+ >>> tmp_path = tempfile.gettempdir()
+ >>> zarr_path = tmp_path + "/adata.zarr"
+
+ >>> adata = pbmc3k()
+ >>> adata.layers["dense"] = adata.X.toarray()
+ >>> adata.write_zarr(zarr_path)
+
+ Reading a sparse matrix from a zarr store lazily, with custom chunk size and default:
+
+ >>> g = zarr.open(zarr_path)
+ >>> adata.X = ad.experimental.read_elem_as_dask(g["X"])
+ >>> adata.X
+ dask.array
+ >>> adata.X = ad.experimental.read_elem_as_dask(
+ ... g["X"], chunks=(500, adata.shape[1])
+ ... )
+ >>> adata.X
+ dask.array
+
+ Reading a dense matrix from a zarr store lazily:
+
+ >>> adata.layers["dense"] = ad.experimental.read_elem_as_dask(g["layers/dense"])
+ >>> adata.layers["dense"]
+ dask.array
+
+ Making a new anndata object from on-disk, with custom chunks:
+
+ >>> adata = ad.AnnData(
+ ... obs=ad.io.read_elem(g["obs"]),
+ ... var=ad.io.read_elem(g["var"]),
+ ... uns=ad.io.read_elem(g["uns"]),
+ ... obsm=ad.io.read_elem(g["obsm"]),
+ ... varm=ad.io.read_elem(g["varm"]),
+ ... )
+ >>> adata.X = ad.experimental.read_elem_as_dask(
+ ... g["X"], chunks=(500, adata.shape[1])
+ ... )
+ >>> adata.layers["dense"] = ad.experimental.read_elem_as_dask(g["layers/dense"])
+
+ We also support using -1 and None as a chunk size to signify the reading the whole axis:
+
+ >>> adata.X = ad.experimental.read_elem_as_dask(g["X"], chunks=(500, -1))
+ >>> adata.X = ad.experimental.read_elem_as_dask(g["X"], chunks=(500, None))
"""
return DaskReader(_LAZY_REGISTRY).read_elem(elem, chunks=chunks)
@@ -406,7 +464,7 @@ def read_elem_as_dask(
def write_elem(
store: GroupStorageType,
k: str,
- elem: InMemoryElem,
+ elem: RWAble,
*,
dataset_kwargs: Mapping[str, Any] = MappingProxyType({}),
) -> None:
diff --git a/src/anndata/_io/utils.py b/src/anndata/_io/utils.py
index ee7aa23d0..f8bdb01c7 100644
--- a/src/anndata/_io/utils.py
+++ b/src/anndata/_io/utils.py
@@ -1,6 +1,7 @@
from __future__ import annotations
from functools import wraps
+from itertools import pairwise
from typing import TYPE_CHECKING, cast
from warnings import warn
@@ -8,16 +9,16 @@
from packaging.version import Version
from .._core.sparse_dataset import BaseCompressedSparseDataset
-from ..compat import add_note, pairwise
+from ..compat import add_note
if TYPE_CHECKING:
from collections.abc import Callable
- from typing import Literal, Union
+ from typing import Literal
from .._types import StorageType
from ..compat import H5Group, ZarrGroup
- Storage = Union[StorageType, BaseCompressedSparseDataset]
+ Storage = StorageType | BaseCompressedSparseDataset
# For allowing h5py v3
# https://github.com/scverse/anndata/issues/442
diff --git a/src/anndata/_io/zarr.py b/src/anndata/_io/zarr.py
index 4cab3ea8d..2564738ad 100644
--- a/src/anndata/_io/zarr.py
+++ b/src/anndata/_io/zarr.py
@@ -103,6 +103,7 @@ def callback(func, elem_name: str, elem, iospec):
@report_read_key_on_error
def read_dataset(dataset: zarr.Array):
+ """Legacy method for reading datasets without encoding_type."""
value = dataset[...]
if not hasattr(value, "dtype"):
return value
diff --git a/src/anndata/_settings.py b/src/anndata/_settings.py
index 285415a74..f67633c08 100644
--- a/src/anndata/_settings.py
+++ b/src/anndata/_settings.py
@@ -14,6 +14,7 @@
from types import GenericAlias
from typing import TYPE_CHECKING, Generic, NamedTuple, TypeVar, cast
+from anndata.compat import CAN_USE_SPARSE_ARRAY
from anndata.compat.exceptiongroups import add_note
if TYPE_CHECKING:
@@ -396,36 +397,54 @@ def __doc__(self):
##################################################################################
-categories_option = "should_remove_unused_categories"
-categories_default_value = True
-categories_description = (
- "Whether or not to remove unused categories with :class:`~pandas.Categorical`."
-)
-
-uniqueness_option = "should_check_uniqueness"
-uniqueness_default_value = True
-uniqueness_description = "Whether or not to check uniqueness of the `obs` indices on `__init__` of :class:`~anndata.AnnData`."
-
-
-def validate_bool(val) -> None:
+def validate_bool(val: Any) -> None:
if not isinstance(val, bool):
msg = f"{val} not valid boolean"
raise TypeError(msg)
settings.register(
- categories_option,
- categories_default_value,
- categories_description,
- validate_bool,
+ "remove_unused_categories",
+ default_value=True,
+ description="Whether or not to remove unused categories with :class:`~pandas.Categorical`.",
+ validate=validate_bool,
get_from_env=check_and_get_bool,
)
settings.register(
- uniqueness_option,
- uniqueness_default_value,
- uniqueness_description,
- validate_bool,
+ "check_uniqueness",
+ default_value=True,
+ description=(
+ "Whether or not to check uniqueness of the `obs` indices on `__init__` of :class:`~anndata.AnnData`."
+ ),
+ validate=validate_bool,
+ get_from_env=check_and_get_bool,
+)
+
+settings.register(
+ "allow_write_nullable_strings",
+ default_value=False,
+ description="Whether or not to allow writing of `pd.arrays.StringArray`.",
+ validate=validate_bool,
+ get_from_env=check_and_get_bool,
+)
+
+
+def validate_sparse_settings(val: Any) -> None:
+ validate_bool(val)
+ if not CAN_USE_SPARSE_ARRAY and cast(bool, val):
+ msg = (
+ "scipy.sparse.cs{r,c}array is not available in current scipy version. "
+ "Falling back to scipy.sparse.cs{r,c}_matrix for reading."
+ )
+ raise ValueError(msg)
+
+
+settings.register(
+ "use_sparse_array_on_read",
+ default_value=False,
+ description="Whether or not to use :class:`scipy.sparse.sparray` as the default class when reading in data",
+ validate=validate_sparse_settings,
get_from_env=check_and_get_bool,
)
diff --git a/src/anndata/_types.py b/src/anndata/_types.py
index 3549152f5..2d9eb9980 100644
--- a/src/anndata/_types.py
+++ b/src/anndata/_types.py
@@ -4,89 +4,47 @@
from __future__ import annotations
-from typing import TYPE_CHECKING, Protocol, TypeVar, Union
+from typing import TYPE_CHECKING, Protocol, TypeVar
-import numpy as np
-import pandas as pd
-from numpy.typing import NDArray
-from scipy import sparse
-
-from anndata._core.anndata import AnnData
-
-from ._core.sparse_dataset import BaseCompressedSparseDataset
from .compat import (
- AwkArray,
- CupyArray,
- CupySparseMatrix,
- DaskArray,
H5Array,
H5Group,
- SpArray,
- ZappyArray,
ZarrArray,
ZarrGroup,
)
+from .typing import RWAble
if TYPE_CHECKING:
from collections.abc import Mapping
from typing import Any, TypeAlias
- from anndata._io.specs.registry import DaskReader
-
- from ._io.specs.registry import IOSpec, Reader, Writer
+ from ._io.specs.registry import DaskReader, IOSpec, Reader, Writer
+ from .compat import DaskArray
__all__ = [
"ArrayStorageType",
"GroupStorageType",
"StorageType",
+ "_ReadInternal",
+ "_ReadDaskInternal",
+ "_WriteInternal",
]
-InMemoryArrayOrScalarType: TypeAlias = Union[
- NDArray,
- np.ma.MaskedArray,
- sparse.spmatrix,
- SpArray,
- H5Array,
- ZarrArray,
- ZappyArray,
- BaseCompressedSparseDataset,
- DaskArray,
- CupyArray,
- CupySparseMatrix,
- AwkArray,
- pd.DataFrame,
- np.number,
- str,
-]
-RWAble: TypeAlias = Union[
- InMemoryArrayOrScalarType, dict[str, "RWAble"], list["RWAble"]
-] # noqa: TCH010
-InMemoryElem: TypeAlias = Union[
- RWAble,
- AnnData,
- pd.Categorical,
- pd.api.extensions.ExtensionArray,
-]
-
-ArrayStorageType: TypeAlias = Union[ZarrArray, H5Array]
-GroupStorageType: TypeAlias = Union[ZarrGroup, H5Group]
-StorageType: TypeAlias = Union[ArrayStorageType, GroupStorageType]
+ArrayStorageType: TypeAlias = ZarrArray | H5Array
+GroupStorageType: TypeAlias = ZarrGroup | H5Group
+StorageType: TypeAlias = ArrayStorageType | GroupStorageType
# NOTE: If you change these, be sure to update `autodoc_type_aliases` in docs/conf.py!
-ContravariantInMemoryType = TypeVar(
- "ContravariantInMemoryType", bound="InMemoryElem", contravariant=True
-)
-CovariantInMemoryType = TypeVar(
- "CovariantInMemoryType", bound="InMemoryElem", covariant=True
-)
-InvariantInMemoryType = TypeVar("InvariantInMemoryType", bound="InMemoryElem")
+ContravariantRWAble = TypeVar("ContravariantRWAble", bound=RWAble, contravariant=True)
+CovariantRWAble = TypeVar("CovariantRWAble", bound=RWAble, covariant=True)
+InvariantRWAble = TypeVar("InvariantRWAble", bound=RWAble)
SCo = TypeVar("SCo", covariant=True, bound=StorageType)
SCon = TypeVar("SCon", contravariant=True, bound=StorageType)
-class _ReadInternal(Protocol[SCon, CovariantInMemoryType]):
- def __call__(self, elem: SCon, *, _reader: Reader) -> CovariantInMemoryType: ...
+class _ReadInternal(Protocol[SCon, CovariantRWAble]):
+ def __call__(self, elem: SCon, *, _reader: Reader) -> CovariantRWAble: ...
class _ReadDaskInternal(Protocol[SCon]):
@@ -95,8 +53,8 @@ def __call__(
) -> DaskArray: ...
-class Read(Protocol[SCon, CovariantInMemoryType]):
- def __call__(self, elem: SCon) -> CovariantInMemoryType:
+class Read(Protocol[SCon, CovariantRWAble]):
+ def __call__(self, elem: SCon) -> CovariantRWAble:
"""Low-level reading function for an element.
Parameters
@@ -129,24 +87,24 @@ def __call__(
...
-class _WriteInternal(Protocol[ContravariantInMemoryType]):
+class _WriteInternal(Protocol[ContravariantRWAble]):
def __call__(
self,
f: StorageType,
k: str,
- v: ContravariantInMemoryType,
+ v: ContravariantRWAble,
*,
_writer: Writer,
dataset_kwargs: Mapping[str, Any],
) -> None: ...
-class Write(Protocol[ContravariantInMemoryType]):
+class Write(Protocol[ContravariantRWAble]):
def __call__(
self,
f: StorageType,
k: str,
- v: ContravariantInMemoryType,
+ v: ContravariantRWAble,
*,
dataset_kwargs: Mapping[str, Any],
) -> None:
@@ -166,23 +124,23 @@ def __call__(
...
-class ReadCallback(Protocol[SCo, InvariantInMemoryType]):
+class ReadCallback(Protocol[SCo, InvariantRWAble]):
def __call__(
self,
/,
- read_func: Read[SCo, InvariantInMemoryType],
+ read_func: Read[SCo, InvariantRWAble],
elem_name: str,
elem: StorageType,
*,
iospec: IOSpec,
- ) -> InvariantInMemoryType:
+ ) -> InvariantRWAble:
"""
Callback used in :func:`anndata.experimental.read_dispatched` to customize reading an element from a store.
Params
------
read_func
- :func:`anndata.experimental.read_elem` function to call to read the current element given the ``iospec``.
+ :func:`anndata.io.read_elem` function to call to read the current element given the ``iospec``.
elem_name
The key to read in from the group.
elem
@@ -197,14 +155,14 @@ def __call__(
...
-class WriteCallback(Protocol[InvariantInMemoryType]):
+class WriteCallback(Protocol[InvariantRWAble]):
def __call__(
self,
/,
- write_func: Write[InvariantInMemoryType],
+ write_func: Write[InvariantRWAble],
store: StorageType,
elem_name: str,
- elem: InvariantInMemoryType,
+ elem: InvariantRWAble,
*,
iospec: IOSpec,
dataset_kwargs: Mapping[str, Any],
@@ -215,7 +173,7 @@ def __call__(
Params
------
write_func
- :func:`anndata.experimental.write_elem` function to call to read the current element given the ``iospec``.
+ :func:`anndata.io.write_elem` function to call to read the current element given the ``iospec``.
store
The store to which `elem` should be written.
elem_name
diff --git a/src/anndata/abc.py b/src/anndata/abc.py
new file mode 100644
index 000000000..df8c8a6e8
--- /dev/null
+++ b/src/anndata/abc.py
@@ -0,0 +1,71 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from typing import ClassVar, Literal
+
+ import numpy as np
+ from scipy.sparse import csc_matrix, csr_matrix
+
+ from .compat import Index, SpArray
+
+
+__all__ = ["CSRDataset", "CSCDataset"]
+
+
+class _AbstractCSDataset(ABC):
+ """Base for the public API for CSRDataset/CSCDataset."""
+
+ format: ClassVar[Literal["csr", "csc"]]
+ """The format of the sparse matrix."""
+
+ shape: tuple[int, int]
+ """Shape of the matrix."""
+
+ dtype: np.dtype
+ """The :class:`numpy.dtype` of the `data` attribute of the sparse matrix."""
+
+ backend: Literal["zarr", "hdf5"]
+ """Which file type is used on-disk."""
+
+ @abstractmethod
+ def __getitem__(self, index: Index) -> float | csr_matrix | csc_matrix | SpArray:
+ """Load a slice or an element from the sparse dataset into memory.
+
+ Parameters
+ ----------
+ index
+ Index to load.
+
+ Returns
+ -------
+ The desired data read off disk.
+ """
+
+ @abstractmethod
+ def to_memory(self) -> csr_matrix | csc_matrix | SpArray:
+ """Load the sparse dataset into memory.
+
+ Returns
+ -------
+ The in-memory representation of the sparse dataset.
+ """
+
+
+_sparse_dataset_doc = """\
+On disk {format} sparse matrix.
+
+Analogous to :class:`h5py.Dataset` or :class:`zarr.core.Array`, but for sparse matrices.
+"""
+
+
+class CSRDataset(_AbstractCSDataset, ABC):
+ __doc__ = _sparse_dataset_doc.format(format="CSR")
+ format = "csr"
+
+
+class CSCDataset(_AbstractCSDataset, ABC):
+ __doc__ = _sparse_dataset_doc.format(format="CSC")
+ format = "csc"
diff --git a/src/anndata/compat/__init__.py b/src/anndata/compat/__init__.py
index 026f6cb4c..255ffa548 100644
--- a/src/anndata/compat/__init__.py
+++ b/src/anndata/compat/__init__.py
@@ -7,9 +7,11 @@
from contextlib import AbstractContextManager
from dataclasses import dataclass, field
from functools import singledispatch, wraps
+from importlib.util import find_spec
from inspect import Parameter, signature
from pathlib import Path
-from typing import TYPE_CHECKING, TypeVar, Union
+from types import EllipsisType
+from typing import TYPE_CHECKING, TypeVar
from warnings import warn
import h5py
@@ -45,8 +47,18 @@ class Empty:
pass
-Index1D = Union[slice, int, str, np.int64, np.ndarray]
-Index = Union[Index1D, tuple[Index1D, Index1D], scipy.sparse.spmatrix, SpArray]
+Index1D = slice | int | str | np.int64 | np.ndarray
+IndexRest = Index1D | EllipsisType
+Index = (
+ IndexRest
+ | tuple[Index1D, IndexRest]
+ | tuple[IndexRest, Index1D]
+ | tuple[Index1D, Index1D, EllipsisType]
+ | tuple[EllipsisType, Index1D, Index1D]
+ | tuple[Index1D, EllipsisType, Index1D]
+ | scipy.sparse.spmatrix
+ | SpArray
+)
H5Group = h5py.Group
H5Array = h5py.Dataset
H5File = h5py.File
@@ -74,26 +86,14 @@ def __exit__(self, *_exc_info) -> None:
os.chdir(self._old_cwd.pop())
-if sys.version_info >= (3, 10):
- from itertools import pairwise
-else:
-
- def pairwise(iterable):
- from itertools import tee
-
- a, b = tee(iterable)
- next(b, None)
- return zip(a, b)
-
-
#############################
# Optional deps
#############################
-try:
+if find_spec("zarr") or TYPE_CHECKING:
from zarr.core import Array as ZarrArray
from zarr.hierarchy import Group as ZarrGroup
-except ImportError:
+else:
class ZarrArray:
@staticmethod
@@ -106,12 +106,10 @@ def __repr__():
return "mock zarr.core.Group"
-try:
- import awkward
-
- AwkArray = awkward.Array
-
-except ImportError:
+if find_spec("awkward") or TYPE_CHECKING:
+ import awkward # noqa: F401
+ from awkward import Array as AwkArray
+else:
class AwkArray:
@staticmethod
@@ -119,9 +117,9 @@ def __repr__():
return "mock awkward.highlevel.Array"
-try:
+if find_spec("zappy") or TYPE_CHECKING:
from zappy.base import ZappyArray
-except ImportError:
+else:
class ZappyArray:
@staticmethod
@@ -129,9 +127,12 @@ def __repr__():
return "mock zappy.base.ZappyArray"
-try:
+if TYPE_CHECKING:
+ # type checkers are confused and can only see …core.Array
+ from dask.array.core import Array as DaskArray
+elif find_spec("dask"):
from dask.array import Array as DaskArray
-except ImportError:
+else:
class DaskArray:
@staticmethod
@@ -139,27 +140,29 @@ def __repr__():
return "mock dask.array.core.Array"
-try:
+# https://github.com/scverse/anndata/issues/1749
+def is_cupy_importable() -> bool:
+ try:
+ import cupy # noqa: F401
+ except ImportError:
+ return False
+ return True
+
+
+if is_cupy_importable() or TYPE_CHECKING:
from cupy import ndarray as CupyArray
- from cupyx.scipy.sparse import (
- csc_matrix as CupyCSCMatrix,
- )
- from cupyx.scipy.sparse import (
- csr_matrix as CupyCSRMatrix,
- )
- from cupyx.scipy.sparse import (
- spmatrix as CupySparseMatrix,
- )
+ from cupyx.scipy.sparse import csc_matrix as CupyCSCMatrix
+ from cupyx.scipy.sparse import csr_matrix as CupyCSRMatrix
+ from cupyx.scipy.sparse import spmatrix as CupySparseMatrix
try:
import dask.array as da
-
- da.register_chunk_type(CupyCSRMatrix)
- da.register_chunk_type(CupyCSCMatrix)
except ImportError:
pass
-
-except ImportError:
+ else:
+ da.register_chunk_type(CupyCSRMatrix)
+ da.register_chunk_type(CupyCSCMatrix)
+else:
class CupySparseMatrix:
@staticmethod
@@ -293,7 +296,7 @@ def _to_fixed_length_strings(value: np.ndarray) -> np.ndarray:
return value.astype(new_dtype)
-Group_T = TypeVar("Group_T", bound=Union[ZarrGroup, h5py.Group])
+Group_T = TypeVar("Group_T", bound=ZarrGroup | h5py.Group)
# TODO: This is a workaround for https://github.com/scverse/anndata/issues/874
@@ -324,7 +327,7 @@ def _clean_uns(adata: AnnData): # noqa: F821
continue
name = cats_name.replace("_categories", "")
# fix categories with a single category
- if isinstance(cats, (str, int)):
+ if isinstance(cats, str | int):
cats = [cats]
for ann in [adata.obs, adata.var]:
if name not in ann:
@@ -349,7 +352,7 @@ def _move_adj_mtx(d):
for k in ("distances", "connectivities"):
if (
(k in n)
- and isinstance(n[k], (scipy.sparse.spmatrix, np.ndarray))
+ and isinstance(n[k], scipy.sparse.spmatrix | np.ndarray)
and len(n[k].shape) == 2
):
warn(
diff --git a/src/anndata/experimental/__init__.py b/src/anndata/experimental/__init__.py
index bf21ed6e7..90e83a87e 100644
--- a/src/anndata/experimental/__init__.py
+++ b/src/anndata/experimental/__init__.py
@@ -1,38 +1,52 @@
from __future__ import annotations
-from anndata._core.sparse_dataset import CSCDataset, CSRDataset, sparse_dataset
-from anndata._io.specs import IOSpec, read_elem, read_elem_as_dask, write_elem
+from types import MappingProxyType
+from typing import TYPE_CHECKING
-from .._types import InMemoryElem as _InMemoryElem
+from .._io.specs import IOSpec, read_elem_as_dask
from .._types import Read, ReadCallback, StorageType, Write, WriteCallback
-from .._types import RWAble as _RWAble
+from ..utils import module_get_attr_redirect
from ._dispatch_io import read_dispatched, write_dispatched
from .merge import concat_on_disk
from .multi_files import AnnCollection
from .pytorch import AnnLoader
-# Sphinx can’t find data docstrings when objects are re-exported
-InMemoryElem = _InMemoryElem
-"""An in-memory element that can be read and written, including an :class:`anndata.AnnData` objects."""
-RWAble = _RWAble
-"""A serializable object, excluding :class:`anndata.AnnData` objects i.e., something that can be stored in `uns` or `obsm`."""
+if TYPE_CHECKING:
+ from typing import Any
+
+
+# Map old name in `anndata.experimental` to new name in `anndata`
+_DEPRECATED = MappingProxyType(
+ dict(
+ (kv if isinstance(kv, tuple) else (kv, kv))
+ for kv in (
+ ("CSRDataset", "abc.CSRDataset"),
+ ("CSCDataset", "abc.CSCDataset"),
+ ("sparse_dataset", "io.sparse_dataset"),
+ ("read_elem", "io.read_elem"),
+ ("write_elem", "io.write_elem"),
+ ("RWAble", "typing.AxisStorable"),
+ ("InMemoryElem", "typing.RWAble"),
+ )
+ )
+)
+
+
+def __getattr__(attr_name: str) -> Any:
+ return module_get_attr_redirect(
+ attr_name, deprecated_mapping=_DEPRECATED, old_module_path="experimental"
+ )
+
__all__ = [
"AnnCollection",
"AnnLoader",
- "read_elem",
- "write_elem",
"read_elem_as_dask",
"read_dispatched",
"write_dispatched",
"IOSpec",
"concat_on_disk",
- "sparse_dataset",
- "CSRDataset",
- "CSCDataset",
- "InMemoryElem",
"Read",
- "RWAble",
"Write",
"ReadCallback",
"WriteCallback",
diff --git a/src/anndata/experimental/_dispatch_io.py b/src/anndata/experimental/_dispatch_io.py
index 20b47baeb..53f94c453 100644
--- a/src/anndata/experimental/_dispatch_io.py
+++ b/src/anndata/experimental/_dispatch_io.py
@@ -9,17 +9,17 @@
from anndata._types import (
GroupStorageType,
- InMemoryElem,
ReadCallback,
StorageType,
WriteCallback,
)
+ from anndata.typing import RWAble
def read_dispatched(
elem: StorageType,
callback: ReadCallback,
-) -> InMemoryElem:
+) -> RWAble:
"""
Read elem, calling the callback at each sub-element.
@@ -45,7 +45,7 @@ def read_dispatched(
def write_dispatched(
store: GroupStorageType,
key: str,
- elem: InMemoryElem,
+ elem: RWAble,
callback: WriteCallback,
*,
dataset_kwargs: Mapping[str, Any] = MappingProxyType({}),
diff --git a/src/anndata/experimental/merge.py b/src/anndata/experimental/merge.py
index 9690420ec..21a678e2c 100644
--- a/src/anndata/experimental/merge.py
+++ b/src/anndata/experimental/merge.py
@@ -352,7 +352,7 @@ def _write_concat_sequence(
)
write_elem(output_group, output_path, df)
elif all(
- isinstance(a, (pd.DataFrame, BaseCompressedSparseDataset, H5Array, ZarrArray))
+ isinstance(a, pd.DataFrame | BaseCompressedSparseDataset | H5Array | ZarrArray)
for a in arrays
):
_write_concat_arrays(
diff --git a/src/anndata/experimental/multi_files/_anncollection.py b/src/anndata/experimental/multi_files/_anncollection.py
index 31b27c879..c5f427f6d 100644
--- a/src/anndata/experimental/multi_files/_anncollection.py
+++ b/src/anndata/experimental/multi_files/_anncollection.py
@@ -3,7 +3,7 @@
import warnings
from collections.abc import Callable, Mapping
from functools import reduce
-from typing import TYPE_CHECKING, Union
+from typing import TYPE_CHECKING
import numpy as np
import pandas as pd
@@ -584,7 +584,7 @@ def attrs_keys(self):
DictCallable = dict[str, Callable]
-ConvertType = Union[Callable, dict[str, Union[Callable, DictCallable]]]
+ConvertType = Callable | dict[str, Callable | DictCallable]
class AnnCollection(_ConcatViewMixin, _IterateViewMixin):
diff --git a/src/anndata/experimental/pytorch/_annloader.py b/src/anndata/experimental/pytorch/_annloader.py
index 8cc883921..cebbe1b5d 100644
--- a/src/anndata/experimental/pytorch/_annloader.py
+++ b/src/anndata/experimental/pytorch/_annloader.py
@@ -2,6 +2,7 @@
from copy import copy
from functools import partial
+from importlib.util import find_spec
from math import ceil
from typing import TYPE_CHECKING
@@ -14,10 +15,10 @@
if TYPE_CHECKING:
from collections.abc import Sequence
-try:
+if find_spec("torch") or TYPE_CHECKING:
import torch
from torch.utils.data import BatchSampler, DataLoader, Sampler
-except ImportError:
+else:
Sampler, BatchSampler, DataLoader = object, object, object
diff --git a/src/anndata/io.py b/src/anndata/io.py
new file mode 100644
index 000000000..5f9ba323c
--- /dev/null
+++ b/src/anndata/io.py
@@ -0,0 +1,48 @@
+from __future__ import annotations
+
+from importlib.util import find_spec
+from typing import TYPE_CHECKING
+
+from ._core.sparse_dataset import sparse_dataset
+from ._io.h5ad import read_h5ad, write_h5ad
+from ._io.read import (
+ read_csv,
+ read_excel,
+ read_hdf,
+ read_loom,
+ read_mtx,
+ read_text,
+ read_umi_tools,
+)
+from ._io.specs import read_elem, write_elem
+from ._io.write import write_csvs, write_loom
+
+if find_spec("zarr") or TYPE_CHECKING:
+ from ._io.zarr import read_zarr, write_zarr
+else: # pragma: no cover
+
+ def read_zarr(*args, **kw):
+ raise ImportError("zarr is not installed")
+
+ def write_zarr(*args, **kw):
+ raise ImportError("zarr is not installed")
+
+
+__all__ = [
+ "read_csv",
+ "read_excel",
+ "read_h5ad",
+ "read_hdf",
+ "read_loom",
+ "read_mtx",
+ "read_text",
+ "read_umi_tools",
+ "read_zarr",
+ "write_csvs",
+ "write_h5ad",
+ "write_loom",
+ "write_zarr",
+ "write_elem",
+ "read_elem",
+ "sparse_dataset",
+]
diff --git a/src/anndata/logging.py b/src/anndata/logging.py
index a2a890c51..1a0f2e11d 100644
--- a/src/anndata/logging.py
+++ b/src/anndata/logging.py
@@ -31,7 +31,7 @@ def get_memory_usage():
meminfo = process.get_memory_info()
mem = meminfo[0] / 2**30 # output in GB
mem_diff = mem
- global _previous_memory_usage
+ global _previous_memory_usage # noqa: PLW0603
if _previous_memory_usage is not None:
mem_diff = mem - _previous_memory_usage
_previous_memory_usage = mem
diff --git a/src/anndata/tests/helpers.py b/src/anndata/tests/helpers.py
index 185808b8d..6ed637ed8 100644
--- a/src/anndata/tests/helpers.py
+++ b/src/anndata/tests/helpers.py
@@ -4,9 +4,11 @@
import random
import re
import warnings
+from collections import Counter
from collections.abc import Mapping
from contextlib import contextmanager
from functools import partial, singledispatch, wraps
+from importlib.util import find_spec
from string import ascii_letters
from typing import TYPE_CHECKING
@@ -35,8 +37,20 @@
from anndata.utils import asarray
if TYPE_CHECKING:
- from collections.abc import Collection
- from typing import Literal
+ from collections.abc import Callable, Collection, Iterable
+ from typing import Literal, TypeGuard, TypeVar
+
+ DT = TypeVar("DT")
+
+
+try:
+ from pandas.core.arrays.integer import IntegerDtype
+except ImportError:
+ IntegerDtype = (
+ *(pd.Int8Dtype, pd.Int16Dtype, pd.Int32Dtype, pd.Int64Dtype),
+ *(pd.UInt8Dtype, pd.UInt16Dtype, pd.UInt32Dtype, pd.UInt64Dtype),
+ )
+
# Give this to gen_adata when dask array support is expected.
GEN_ADATA_DASK_ARGS = dict(
@@ -45,30 +59,43 @@
np.ndarray,
pd.DataFrame,
DaskArray,
+ *((sparse.csr_array,) if CAN_USE_SPARSE_ARRAY else ()),
),
varm_types=(
sparse.csr_matrix,
np.ndarray,
pd.DataFrame,
DaskArray,
+ *((sparse.csr_array,) if CAN_USE_SPARSE_ARRAY else ()),
),
layers_types=(
sparse.csr_matrix,
np.ndarray,
pd.DataFrame,
DaskArray,
+ *((sparse.csr_array,) if CAN_USE_SPARSE_ARRAY else ()),
),
)
-if CAN_USE_SPARSE_ARRAY:
- GEN_ADATA_DASK_ARGS["obsm_types"] = GEN_ADATA_DASK_ARGS["obsm_types"] + (
- sparse.csr_array,
- )
- GEN_ADATA_DASK_ARGS["varm_types"] = GEN_ADATA_DASK_ARGS["varm_types"] + (
- sparse.csr_array,
- )
- GEN_ADATA_DASK_ARGS["layers_types"] = GEN_ADATA_DASK_ARGS["layers_types"] + (
- sparse.csr_array,
- )
+
+
+DEFAULT_KEY_TYPES = (
+ sparse.csr_matrix,
+ np.ndarray,
+ pd.DataFrame,
+ *((sparse.csr_array,) if CAN_USE_SPARSE_ARRAY else ()),
+)
+
+
+DEFAULT_COL_TYPES = (
+ pd.CategoricalDtype(ordered=False),
+ pd.CategoricalDtype(ordered=True),
+ np.int64,
+ np.float64,
+ np.uint8,
+ np.bool_,
+ pd.BooleanDtype,
+ pd.Int32Dtype,
+)
def gen_vstr_recarray(m, n, dtype=None):
@@ -82,30 +109,82 @@ def gen_vstr_recarray(m, n, dtype=None):
)
-def gen_typed_df(n, index=None):
- # TODO: Think about allowing index to be passed for n
- letters = np.fromiter(iter(ascii_letters), "U1")
- if n > len(letters):
- letters = letters[: n // 2] # Make sure categories are repeated
- return pd.DataFrame(
- {
- "cat": pd.Categorical(np.random.choice(letters, n)),
- "cat_ordered": pd.Categorical(np.random.choice(letters, n), ordered=True),
- "int64": np.random.randint(-50, 50, n),
- "float64": np.random.random(n),
- "uint8": np.random.randint(255, size=n, dtype="uint8"),
- "bool": np.random.randint(0, 2, size=n, dtype=bool),
- "nullable-bool": pd.arrays.BooleanArray(
+def issubdtype(
+ a: np.dtype | pd.api.extensions.ExtensionDtype | type,
+ b: type[DT] | tuple[type[DT], ...],
+) -> TypeGuard[DT]:
+ if isinstance(b, tuple):
+ return any(issubdtype(a, t) for t in b)
+ if isinstance(a, type) and issubclass(a, pd.api.extensions.ExtensionDtype):
+ return issubclass(a, b)
+ if isinstance(a, pd.api.extensions.ExtensionDtype):
+ return isinstance(a, b)
+ try:
+ return np.issubdtype(a, b)
+ except TypeError: # pragma: no cover
+ pytest.fail(f"issubdtype can’t handle everything yet: {a} {b}")
+
+
+def gen_random_column(
+ n: int, dtype: np.dtype | pd.api.extensions.ExtensionDtype
+) -> tuple[str, np.ndarray | pd.api.extensions.ExtensionArray]:
+ if issubdtype(dtype, pd.CategoricalDtype):
+ # TODO: Think about allowing index to be passed for n
+ letters = np.fromiter(iter(ascii_letters), "U1")
+ if n > len(letters):
+ letters = letters[: n // 2] # Make sure categories are repeated
+ key = "cat" if dtype.ordered else "cat_unordered"
+ return key, pd.Categorical(np.random.choice(letters, n), dtype=dtype)
+ if issubdtype(dtype, pd.BooleanDtype):
+ return (
+ "nullable-bool",
+ pd.arrays.BooleanArray(
np.random.randint(0, 2, size=n, dtype=bool),
mask=np.random.randint(0, 2, size=n, dtype=bool),
),
- "nullable-int": pd.arrays.IntegerArray(
+ )
+ if issubdtype(dtype, IntegerDtype):
+ return (
+ "nullable-int",
+ pd.arrays.IntegerArray(
np.random.randint(0, 1000, size=n, dtype=np.int32),
mask=np.random.randint(0, 2, size=n, dtype=bool),
),
- },
- index=index,
- )
+ )
+ if issubdtype(dtype, pd.StringDtype):
+ letters = np.fromiter(iter(ascii_letters), "U1")
+ array = np.array(np.random.choice(letters, n), dtype=dtype)
+ array[np.random.randint(0, 2, size=n, dtype=bool)] = pd.NA
+ return "string", array
+ # if issubdtype(dtype, pd.DatetimeTZDtype):
+ # return "datetime", pd.to_datetime(np.random.randint(0, 1000, size=n))
+ if issubdtype(dtype, np.bool_):
+ return "bool", np.random.randint(0, 2, size=n, dtype=dtype)
+
+ if not issubdtype(dtype, np.number): # pragma: no cover
+ pytest.fail(f"Unexpected dtype: {dtype}")
+
+ n_bits = 8 * (dtype().itemsize if isinstance(dtype, type) else dtype.itemsize)
+
+ if issubdtype(dtype, np.unsignedinteger):
+ return f"uint{n_bits}", np.random.randint(0, 255, n, dtype=dtype)
+ if issubdtype(dtype, np.signedinteger):
+ return f"int{n_bits}", np.random.randint(-50, 50, n, dtype=dtype)
+ if issubdtype(dtype, np.floating):
+ return f"float{n_bits}", np.random.random(n).astype(dtype)
+
+ pytest.fail(f"Unexpected numeric dtype: {dtype}") # pragma: no cover
+
+
+def gen_typed_df(
+ n: int,
+ index: pd.Index[str] | None = None,
+ dtypes: Collection[np.dtype | pd.api.extensions.ExtensionDtype] = DEFAULT_COL_TYPES,
+):
+ columns = [gen_random_column(n, dtype) for dtype in dtypes]
+ col_names = [n for n, _ in columns]
+ assert len(col_names) == len(set(col_names)), "Duplicate column names generated!"
+ return pd.DataFrame(dict(columns), index=index)
def _gen_awkward_inner(shape, rng, dtype):
@@ -182,20 +261,11 @@ def gen_typed_df_t2_size(m, n, index=None, columns=None) -> pd.DataFrame:
return df
-default_key_types = (
- sparse.csr_matrix,
- np.ndarray,
- pd.DataFrame,
-)
-if CAN_USE_SPARSE_ARRAY:
- default_key_types = default_key_types + (sparse.csr_array,)
-
-
def maybe_add_sparse_array(
mapping: Mapping,
types: Collection[type],
format: Literal["csr", "csc"],
- random_state: int,
+ random_state: np.random.Generator,
shape: tuple[int, int],
):
if CAN_USE_SPARSE_ARRAY:
@@ -209,15 +279,20 @@ def maybe_add_sparse_array(
# TODO: Use hypothesis for this?
def gen_adata(
shape: tuple[int, int],
- X_type=sparse.csr_matrix,
- X_dtype=np.float32,
- # obs_dtypes,
- # var_dtypes,
- obsm_types: Collection[type] = default_key_types + (AwkArray,),
- varm_types: Collection[type] = default_key_types + (AwkArray,),
- layers_types: Collection[type] = default_key_types,
- random_state=None,
- sparse_fmt: str = "csr",
+ X_type: Callable[[np.ndarray], object] = sparse.csr_matrix,
+ *,
+ X_dtype: np.dtype = np.float32,
+ obs_dtypes: Collection[
+ np.dtype | pd.api.extensions.ExtensionDtype
+ ] = DEFAULT_COL_TYPES,
+ var_dtypes: Collection[
+ np.dtype | pd.api.extensions.ExtensionDtype
+ ] = DEFAULT_COL_TYPES,
+ obsm_types: Collection[type] = DEFAULT_KEY_TYPES + (AwkArray,),
+ varm_types: Collection[type] = DEFAULT_KEY_TYPES + (AwkArray,),
+ layers_types: Collection[type] = DEFAULT_KEY_TYPES,
+ random_state: np.random.Generator | None = None,
+ sparse_fmt: Literal["csr", "csc"] = "csr",
) -> AnnData:
"""\
Helper function to generate a random AnnData for testing purposes.
@@ -253,8 +328,8 @@ def gen_adata(
M, N = shape
obs_names = pd.Index(f"cell{i}" for i in range(shape[0]))
var_names = pd.Index(f"gene{i}" for i in range(shape[1]))
- obs = gen_typed_df(M, obs_names)
- var = gen_typed_df(N, var_names)
+ obs = gen_typed_df(M, obs_names, dtypes=obs_dtypes)
+ var = gen_typed_df(N, var_names, dtypes=var_dtypes)
# For #147
obs.rename(columns=dict(cat="obs_cat"), inplace=True)
var.rename(columns=dict(cat="var_cat"), inplace=True)
@@ -267,7 +342,7 @@ def gen_adata(
obsm = dict(
array=np.random.random((M, 50)),
sparse=sparse.random(M, 100, format=sparse_fmt, random_state=random_state),
- df=gen_typed_df(M, obs_names),
+ df=gen_typed_df(M, obs_names, dtypes=obs_dtypes),
awk_2d_ragged=gen_awkward((M, None)),
da=da.random.random((M, 50)),
)
@@ -282,7 +357,7 @@ def gen_adata(
varm = dict(
array=np.random.random((N, 50)),
sparse=sparse.random(N, 100, format=sparse_fmt, random_state=random_state),
- df=gen_typed_df(N, var_names),
+ df=gen_typed_df(N, var_names, dtypes=var_dtypes),
awk_2d_ragged=gen_awkward((N, None)),
da=da.random.random((N, 50)),
)
@@ -964,46 +1039,49 @@ def shares_memory_sparse(x, y):
),
]
-try:
- import zarr
+if find_spec("zarr") or TYPE_CHECKING:
+ from zarr import DirectoryStore
+else:
- class AccessTrackingStore(zarr.DirectoryStore):
- def __init__(self, *args, **kwargs):
- super().__init__(*args, **kwargs)
- self._access_count = {}
- self._accessed_keys = {}
+ class DirectoryStore:
+ def __init__(self, *_args, **_kwargs) -> None:
+ cls_name = type(self).__name__
+ msg = f"zarr must be imported to create a {cls_name} instance."
+ raise ImportError(msg)
- def __getitem__(self, key):
- for tracked in self._access_count:
- if tracked in key:
- self._access_count[tracked] += 1
- self._accessed_keys[tracked] += [key]
- return super().__getitem__(key)
- def get_access_count(self, key):
- return self._access_count[key]
+class AccessTrackingStore(DirectoryStore):
+ _access_count: Counter[str]
+ _accessed_keys: dict[str, list[str]]
- def get_accessed_keys(self, key):
- return self._accessed_keys[key]
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self._access_count = Counter()
+ self._accessed_keys = {}
- def initialize_key_trackers(self, keys_to_track):
- for k in keys_to_track:
- self._access_count[k] = 0
- self._accessed_keys[k] = []
+ def __getitem__(self, key: str) -> object:
+ for tracked in self._access_count:
+ if tracked in key:
+ self._access_count[tracked] += 1
+ self._accessed_keys[tracked] += [key]
+ return super().__getitem__(key)
- def reset_key_trackers(self):
- self.initialize_key_trackers(self._access_count.keys())
+ def get_access_count(self, key: str) -> int:
+ return self._access_count[key]
-except ImportError:
+ def get_accessed_keys(self, key: str) -> list[str]:
+ return self._accessed_keys[key]
- class AccessTrackingStore:
- def __init__(self, *_args, **_kwargs) -> None:
- raise ImportError(
- "zarr must be imported to create an `AccessTrackingStore` instance."
- )
+ def initialize_key_trackers(self, keys_to_track: Iterable[str]) -> None:
+ for k in keys_to_track:
+ self._access_count[k] = 0
+ self._accessed_keys[k] = []
+
+ def reset_key_trackers(self) -> None:
+ self.initialize_key_trackers(self._access_count.keys())
-def get_multiindex_columns_df(shape):
+def get_multiindex_columns_df(shape: tuple[int, int]) -> pd.DataFrame:
return pd.DataFrame(
np.random.rand(shape[0], shape[1]),
columns=pd.MultiIndex.from_tuples(
diff --git a/src/anndata/typing.py b/src/anndata/typing.py
new file mode 100644
index 000000000..d13927bad
--- /dev/null
+++ b/src/anndata/typing.py
@@ -0,0 +1,66 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import numpy as np
+import pandas as pd
+from numpy import ma
+from scipy import sparse
+
+from . import abc
+from ._core.anndata import AnnData
+from .compat import (
+ AwkArray,
+ CupyArray,
+ CupySparseMatrix,
+ DaskArray,
+ H5Array,
+ SpArray,
+ ZappyArray,
+ ZarrArray,
+)
+from .compat import Index as _Index
+
+if TYPE_CHECKING:
+ from typing import TypeAlias
+
+
+__all__ = ["Index", "RWAble", "AxisStorable"]
+
+
+Index = _Index
+"""1D or 2D index an :class:`~anndata.AnnData` object can be sliced with."""
+
+
+ArrayDataStructureType: TypeAlias = (
+ np.ndarray
+ | ma.MaskedArray
+ | sparse.csr_matrix
+ | sparse.csc_matrix
+ | SpArray
+ | AwkArray
+ | H5Array
+ | ZarrArray
+ | ZappyArray
+ | abc.CSRDataset
+ | abc.CSCDataset
+ | DaskArray
+ | CupyArray
+ | CupySparseMatrix
+)
+
+
+InMemoryArrayOrScalarType: TypeAlias = (
+ pd.DataFrame | np.number | str | ArrayDataStructureType
+)
+
+
+AxisStorable: TypeAlias = (
+ InMemoryArrayOrScalarType | dict[str, "AxisStorable"] | list["AxisStorable"]
+)
+"""A serializable object, excluding :class:`anndata.AnnData` objects i.e., something that can be stored in `uns` or `obsm`."""
+
+RWAble: TypeAlias = (
+ AxisStorable | AnnData | pd.Categorical | pd.api.extensions.ExtensionArray
+)
+"""A superset of :type:`anndata.typing.AxisStorable` (i.e., including :class:`anndata.AnnData`) which is everything can be read/written by :func:`anndata.io.read_elem` and :func:`anndata.io.write_elem`."""
diff --git a/src/anndata/utils.py b/src/anndata/utils.py
index 3ff844054..60dffa87f 100644
--- a/src/anndata/utils.py
+++ b/src/anndata/utils.py
@@ -10,6 +10,8 @@
import pandas as pd
from scipy import sparse
+import anndata
+
from ._core.sparse_dataset import BaseCompressedSparseDataset
from .compat import CupyArray, CupySparseMatrix, DaskArray, SpArray
from .logging import get_logger
@@ -409,3 +411,27 @@ def raise_value_error_if_multiindex_columns(df: pd.DataFrame, attr: str):
f"Please use a single-level index for {attr}."
)
raise ValueError(msg)
+
+
+def module_get_attr_redirect(
+ attr_name: str,
+ deprecated_mapping: Mapping[str, str],
+ old_module_path: str | None = None,
+) -> Any:
+ full_old_module_path = (
+ f"anndata{'.' + old_module_path if old_module_path is not None else ''}"
+ )
+ if new_path := deprecated_mapping.get(attr_name):
+ msg = (
+ f"Importing {attr_name} from `{full_old_module_path}` is deprecated. "
+ f"Import anndata.{new_path} instead."
+ )
+ warnings.warn(msg, FutureWarning)
+ # hacky import_object_by_name, but we test all these
+ mod = anndata
+ while "." in new_path:
+ mod_name, new_path = new_path.split(".", 1)
+ mod = getattr(mod, mod_name)
+ return getattr(mod, new_path)
+ msg = f"module {full_old_module_path} has no attribute {attr_name!r}"
+ raise AttributeError(msg)
diff --git a/src/testing/anndata/_pytest.py b/src/testing/anndata/_pytest.py
index d29ac334e..5b0fd60e0 100644
--- a/src/testing/anndata/_pytest.py
+++ b/src/testing/anndata/_pytest.py
@@ -32,16 +32,24 @@ def pytest_configure(config: pytest.Config) -> None:
@pytest.fixture(autouse=True)
-def _suppress_env_for_doctests(request: pytest.FixtureRequest) -> None:
+def _anndata_test_env(request: pytest.FixtureRequest) -> None:
+ import anndata
+
if isinstance(request.node, pytest.DoctestItem):
request.getfixturevalue("_doctest_env")
+ anndata.settings.reset(anndata.settings._registered_options.keys())
+
@pytest.fixture
def _doctest_env(
request: pytest.FixtureRequest, cache: pytest.Cache, tmp_path: Path
) -> Generator[None, None, None]:
- from scanpy import settings
+ with warnings.catch_warnings():
+ warnings.filterwarnings(
+ "ignore", message=r"Importing read_.* from `anndata` is deprecated"
+ )
+ from scanpy import settings
from anndata.compat import chdir
from anndata.utils import import_name
diff --git a/tests/conftest.py b/tests/conftest.py
index 65eff92b1..9054812f5 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,21 +1,80 @@
from __future__ import annotations
from functools import partial
+from typing import TYPE_CHECKING
+import dask
import joblib
import pytest
-from dask.base import normalize_seq, normalize_token, tokenize
+from dask.base import normalize_token, tokenize
+from packaging.version import Version
+
+if Version(dask.__version__) < Version("2024.8.0"):
+ from dask.base import normalize_seq
+else:
+ from dask.tokenize import normalize_seq
from scipy import sparse
import anndata as ad
from anndata.tests.helpers import subset_func # noqa: F401
+if TYPE_CHECKING:
+ from types import EllipsisType
+
@pytest.fixture
def backing_h5ad(tmp_path):
return tmp_path / "test.h5ad"
+@pytest.fixture(
+ params=[
+ pytest.param((..., (slice(None), slice(None))), id="ellipsis"),
+ pytest.param(((...,), (slice(None), slice(None))), id="ellipsis_tuple"),
+ pytest.param(
+ ((..., slice(0, 10)), (slice(None), slice(0, 10))), id="obs-ellipsis"
+ ),
+ pytest.param(
+ ((slice(0, 10), ...), (slice(0, 10), slice(None))), id="var-ellipsis"
+ ),
+ pytest.param(
+ ((slice(0, 10), slice(0, 10), ...), (slice(0, 10), slice(0, 10))),
+ id="obs-var-ellipsis",
+ ),
+ pytest.param(
+ ((..., slice(0, 10), slice(0, 10)), (slice(0, 10), slice(0, 10))),
+ id="ellipsis-obs-var",
+ ),
+ pytest.param(
+ ((slice(0, 10), ..., slice(0, 10)), (slice(0, 10), slice(0, 10))),
+ id="obs-ellipsis-var",
+ ),
+ ]
+)
+def ellipsis_index_with_equivalent(
+ request,
+) -> tuple[tuple[EllipsisType | slice, ...] | EllipsisType, tuple[slice, slice]]:
+ return request.param
+
+
+@pytest.fixture
+def ellipsis_index(
+ ellipsis_index_with_equivalent: tuple[
+ tuple[EllipsisType | slice, ...] | EllipsisType, tuple[slice, slice]
+ ],
+) -> tuple[EllipsisType | slice, ...] | EllipsisType:
+ return ellipsis_index_with_equivalent[0]
+
+
+@pytest.fixture
+def equivalent_ellipsis_index(
+ ellipsis_index_with_equivalent: tuple[
+ tuple[EllipsisType | slice, ...] | EllipsisType, tuple[slice, slice]
+ ],
+) -> tuple[slice, slice]:
+ return ellipsis_index_with_equivalent[1]
+
+
#####################
# Dask tokenization #
#####################
diff --git a/tests/test_awkward.py b/tests/test_awkward.py
index 0e2254afe..4b3f81d8e 100644
--- a/tests/test_awkward.py
+++ b/tests/test_awkward.py
@@ -15,6 +15,7 @@
ImplicitModificationWarning,
read_h5ad,
)
+from anndata.compat import AwkArray
from anndata.compat import awkward as ak
from anndata.tests.helpers import assert_equal, gen_adata, gen_awkward
from anndata.utils import axis_len
@@ -249,6 +250,22 @@ def test_awkward_io(tmp_path, array):
assert_equal(adata.uns["awk"], adata2.uns["awk"], exact=True)
+def test_awkward_io_view(tmp_path):
+ """Check that views are converted to actual arrays on save, i.e. the _view_args and __list__ parameters are removed"""
+ adata = gen_adata((3, 3), varm_types=(), obsm_types=(AwkArray,), layers_types=())
+
+ v = adata[1:]
+ adata_path = tmp_path / "adata.h5ad"
+ v.write_h5ad(adata_path)
+
+ adata2 = read_h5ad(adata_path)
+ # parameters are not fully removed, but set to None
+ assert ak.parameters(adata2.obsm["awk_2d_ragged"]) == {
+ "__list__": None,
+ "_view_args": None,
+ }
+
+
# @pytest.mark.parametrize("join", ["outer", "inner"])
@pytest.mark.parametrize(
("arrays", "join", "expected"),
diff --git a/tests/test_backed_dense.py b/tests/test_backed_dense.py
index 796cad511..3fc19d88a 100644
--- a/tests/test_backed_dense.py
+++ b/tests/test_backed_dense.py
@@ -10,7 +10,7 @@
import zarr
from anndata import AnnData
-from anndata._io.specs import write_elem
+from anndata.io import write_elem
from anndata.tests.helpers import assert_equal
if TYPE_CHECKING:
diff --git a/tests/test_backed_hdf5.py b/tests/test_backed_hdf5.py
index 6cb449e28..19b4ca44d 100644
--- a/tests/test_backed_hdf5.py
+++ b/tests/test_backed_hdf5.py
@@ -200,8 +200,8 @@ def test_backed_raw_subset(tmp_path, array_type, subset_func, subset_func2):
var_idx = subset_func2(mem_adata.var_names)
if (
array_type is asarray
- and isinstance(obs_idx, (list, np.ndarray, sparse.spmatrix, SpArray))
- and isinstance(var_idx, (list, np.ndarray, sparse.spmatrix, SpArray))
+ and isinstance(obs_idx, list | np.ndarray | sparse.spmatrix | SpArray)
+ and isinstance(var_idx, list | np.ndarray | sparse.spmatrix | SpArray)
):
pytest.xfail(
"Fancy indexing does not work with multiple arrays on a h5py.Dataset"
diff --git a/tests/test_backed_sparse.py b/tests/test_backed_sparse.py
index 36a725bf2..2778c76bb 100644
--- a/tests/test_backed_sparse.py
+++ b/tests/test_backed_sparse.py
@@ -14,12 +14,13 @@
from anndata._core.anndata import AnnData
from anndata._core.sparse_dataset import sparse_dataset
from anndata.compat import CAN_USE_SPARSE_ARRAY, SpArray
-from anndata.experimental import read_dispatched, write_elem
+from anndata.experimental import read_dispatched
from anndata.tests.helpers import AccessTrackingStore, assert_equal, subset_func
if TYPE_CHECKING:
from collections.abc import Callable, Generator, Sequence
from pathlib import Path
+ from types import EllipsisType
from _pytest.mark import ParameterSet
from numpy.typing import ArrayLike, NDArray
@@ -127,6 +128,17 @@ def test_backed_indexing(
assert_equal(csr_mem[:, var_idx].X, dense_disk[:, var_idx].X)
+def test_backed_ellipsis_indexing(
+ ondisk_equivalent_adata: tuple[AnnData, AnnData, AnnData, AnnData],
+ ellipsis_index: tuple[EllipsisType | slice, ...] | EllipsisType,
+ equivalent_ellipsis_index: tuple[slice, slice],
+):
+ csr_mem, csr_disk, csc_disk, _ = ondisk_equivalent_adata
+
+ assert_equal(csr_mem.X[equivalent_ellipsis_index], csr_disk.X[ellipsis_index])
+ assert_equal(csr_mem.X[equivalent_ellipsis_index], csc_disk.X[ellipsis_index])
+
+
def make_randomized_mask(size: int) -> np.ndarray:
randomized_mask = np.zeros(size, dtype=bool)
inds = np.random.choice(size, 20, replace=False)
@@ -258,7 +270,7 @@ def test_dataset_append_memory(
f = zarr.open_group(path, "a")
else:
f = h5py.File(path, "a")
- ad._io.specs.write_elem(f, "mtx", a)
+ ad.io.write_elem(f, "mtx", a)
diskmtx = sparse_dataset(f["mtx"])
diskmtx.append(b)
@@ -269,6 +281,44 @@ def test_dataset_append_memory(
assert_equal(fromdisk, frommem)
+@pytest.mark.parametrize("sparse_format", [sparse.csr_matrix, sparse.csc_matrix])
+@pytest.mark.parametrize(
+ ("subset_func", "subset_func2"),
+ product(
+ [
+ ad.tests.helpers.array_subset,
+ ad.tests.helpers.slice_subset,
+ ad.tests.helpers.array_int_subset,
+ ad.tests.helpers.array_bool_subset,
+ ],
+ repeat=2,
+ ),
+)
+def test_read_array(
+ tmp_path: Path,
+ sparse_format: Callable[[ArrayLike], sparse.spmatrix],
+ diskfmt: Literal["h5ad", "zarr"],
+ subset_func,
+ subset_func2,
+):
+ path = tmp_path / f"test.{diskfmt.replace('ad', '')}"
+ a = sparse_format(sparse.random(100, 100))
+ obs_idx = subset_func(np.arange(100))
+ var_idx = subset_func2(np.arange(100))
+ if diskfmt == "zarr":
+ f = zarr.open_group(path, "a")
+ else:
+ f = h5py.File(path, "a")
+ ad.io.write_elem(f, "mtx", a)
+ diskmtx = sparse_dataset(f["mtx"])
+ if not CAN_USE_SPARSE_ARRAY:
+ pytest.skip("scipy.sparse.cs{r,c}array not available")
+ ad.settings.use_sparse_array_on_read = True
+ assert issubclass(type(diskmtx[obs_idx, var_idx]), SpArray)
+ ad.settings.use_sparse_array_on_read = False
+ assert issubclass(type(diskmtx[obs_idx, var_idx]), sparse.spmatrix)
+
+
@pytest.mark.parametrize(
("sparse_format", "append_method"),
[
@@ -290,8 +340,8 @@ def test_dataset_append_disk(
f = zarr.open_group(path, "a")
else:
f = h5py.File(path, "a")
- ad._io.specs.write_elem(f, "a", a)
- ad._io.specs.write_elem(f, "b", b)
+ ad.io.write_elem(f, "a", a)
+ ad.io.write_elem(f, "b", b)
a_disk = sparse_dataset(f["a"])
b_disk = sparse_dataset(f["b"])
@@ -311,7 +361,7 @@ def test_indptr_cache(
path = tmp_path / "test.zarr"
a = sparse_format(sparse.random(10, 10))
f = zarr.open_group(path, "a")
- ad._io.specs.write_elem(f, "X", a)
+ ad.io.write_elem(f, "X", a)
store = AccessTrackingStore(path)
store.initialize_key_trackers(["X/indptr"])
f = zarr.open_group(store, "a")
@@ -396,7 +446,7 @@ def test_data_access(
path = tmp_path / "test.zarr"
a = sparse_format(np.eye(10, 10))
f = zarr.open_group(path, "a")
- ad._io.specs.write_elem(f, "X", a)
+ ad.io.write_elem(f, "X", a)
data = f["X/data"][...]
del f["X/data"]
# chunk one at a time to count properly
@@ -442,8 +492,8 @@ def test_wrong_shape(
else:
f = h5py.File(path, "a")
- ad._io.specs.write_elem(f, "a", a_mem)
- ad._io.specs.write_elem(f, "b", b_mem)
+ ad.io.write_elem(f, "a", a_mem)
+ ad.io.write_elem(f, "b", b_mem)
a_disk = sparse_dataset(f["a"])
b_disk = sparse_dataset(f["b"])
@@ -460,7 +510,7 @@ def test_reset_group(tmp_path: Path):
else:
f = h5py.File(path, "a")
- ad._io.specs.write_elem(f, "base", base)
+ ad.io.write_elem(f, "base", base)
disk_mtx = sparse_dataset(f["base"])
with pytest.raises(AttributeError):
disk_mtx.group = f
@@ -475,7 +525,7 @@ def test_wrong_formats(tmp_path: Path, diskfmt: Literal["h5ad", "zarr"]):
else:
f = h5py.File(path, "a")
- ad._io.specs.write_elem(f, "base", base)
+ ad.io.write_elem(f, "base", base)
disk_mtx = sparse_dataset(f["base"])
pre_checks = disk_mtx.to_memory()
@@ -504,7 +554,7 @@ def test_anndata_sparse_compat(tmp_path: Path, diskfmt: Literal["h5ad", "zarr"])
else:
f = h5py.File(path, "a")
- ad._io.specs.write_elem(f, "/", base)
+ ad.io.write_elem(f, "/", base)
adata = ad.AnnData(sparse_dataset(f["/"]))
assert_equal(adata.X, base)
@@ -545,11 +595,11 @@ def test_append_overflow_check(group_fn, sparse_class, tmpdir):
shape=(1, 2),
)
- write_elem(group, "mtx", orig_mtx)
+ ad.io.write_elem(group, "mtx", orig_mtx)
backed = sparse_dataset(group["mtx"])
# Checking for correct caching behaviour
- backed.indptr
+ backed._indptr
with pytest.raises(
OverflowError,
diff --git a/tests/test_base.py b/tests/test_base.py
index 277e8c8ab..e1401ed74 100644
--- a/tests/test_base.py
+++ b/tests/test_base.py
@@ -30,7 +30,7 @@ def test_creation():
AnnData(np.array([[1, 2], [3, 4]]))
AnnData(np.array([[1, 2], [3, 4]]), {}, {})
AnnData(ma.array([[1, 2], [3, 4]]), uns=dict(mask=[0, 1, 1, 0]))
- AnnData(sp.eye(2))
+ AnnData(sp.eye(2, format="csr"))
if CAN_USE_SPARSE_ARRAY:
AnnData(sp.eye_array(2))
X = np.array([[1, 2, 3], [4, 5, 6]])
@@ -95,7 +95,7 @@ def test_creation_error(src, src_arg, dim_msg, dim, dim_arg, msg: str | None):
def test_invalid_X():
with pytest.raises(
ValueError,
- match=r"X needs to be of one of np\.ndarray.*not \.",
+ match=r"X needs to be of one of .*not \.",
):
AnnData("string is not a valid X")
@@ -126,7 +126,7 @@ def test_error_create_from_multiindex_df(attr):
def test_create_from_sparse_df():
- s = sp.random(20, 30, density=0.2)
+ s = sp.random(20, 30, density=0.2, format="csr")
obs_names = [f"obs{i}" for i in range(20)]
var_names = [f"var{i}" for i in range(30)]
df = pd.DataFrame.sparse.from_spmatrix(s, index=obs_names, columns=var_names)
@@ -277,7 +277,7 @@ def test_setting_dim_index(dim):
mapping_attr = f"{dim}m"
orig = gen_adata((5, 5))
- orig.raw = orig
+ orig.raw = orig.copy()
curr = orig.copy()
view = orig[:, :]
new_idx = pd.Index(list("abcde"), name="letters")
@@ -453,7 +453,7 @@ def test_slicing_remove_unused_categories():
def test_slicing_dont_remove_unused_categories():
- with settings.override(should_remove_unused_categories=False):
+ with settings.override(remove_unused_categories=False):
adata = AnnData(
np.array([[1, 2], [3, 4], [5, 6], [7, 8]]), dict(k=["a", "a", "b", "b"])
)
@@ -462,7 +462,7 @@ def test_slicing_dont_remove_unused_categories():
def test_no_uniqueness_check_gives_repeat_indices():
- with settings.override(should_check_uniqueness=False):
+ with settings.override(check_uniqueness=False):
obs_names = ["0", "0", "1", "1"]
with warnings.catch_warnings():
warnings.simplefilter("error")
@@ -590,7 +590,7 @@ def test_convenience():
adata = adata_sparse.copy()
adata.layers["x2"] = adata.X * 2
adata.var["anno2"] = ["p1", "p2", "p3"]
- adata.raw = adata
+ adata.raw = adata.copy()
adata.X = adata.X / 2
adata_dense = adata.copy()
adata_dense.X = adata_dense.X.toarray()
diff --git a/tests/test_concatenate.py b/tests/test_concatenate.py
index 03126284b..e034debd2 100644
--- a/tests/test_concatenate.py
+++ b/tests/test_concatenate.py
@@ -26,6 +26,7 @@
BASE_MATRIX_PARAMS,
CUPY_MATRIX_PARAMS,
DASK_MATRIX_PARAMS,
+ DEFAULT_COL_TYPES,
GEN_ADATA_DASK_ARGS,
as_dense_dask_array,
assert_equal,
@@ -494,19 +495,19 @@ def get_obs_els(adata):
adata1.obsm = {
k: v
for k, v in adata1.obsm.items()
- if not isinstance(v, (pd.DataFrame, AwkArray))
+ if not isinstance(v, pd.DataFrame | AwkArray)
}
adata2 = gen_adata((10, 5))
adata2.obsm = {
k: v[:, : v.shape[1] // 2]
for k, v in adata2.obsm.items()
- if not isinstance(v, (pd.DataFrame, AwkArray))
+ if not isinstance(v, pd.DataFrame | AwkArray)
}
adata3 = gen_adata((7, 3))
adata3.obsm = {
k: v[:, : v.shape[1] // 3]
for k, v in adata3.obsm.items()
- if not isinstance(v, (pd.DataFrame, AwkArray))
+ if not isinstance(v, pd.DataFrame | AwkArray)
}
# remove AwkArrays from adata.var, as outer joins are not yet implemented for them
for tmp_ad in [adata1, adata2, adata3]:
@@ -696,9 +697,9 @@ def test_concatenate_with_raw():
layers=dict(Xs=X4),
)
- adata1.raw = adata1
- adata2.raw = adata2
- adata3.raw = adata3
+ adata1.raw = adata1.copy()
+ adata2.raw = adata2.copy()
+ adata3.raw = adata3.copy()
adata_all = AnnData.concatenate(adata1, adata2, adata3)
assert isinstance(adata_all.raw, Raw)
@@ -712,7 +713,7 @@ def test_concatenate_with_raw():
assert_equal(adata_all.raw.to_adata().obs, adata_all.obs)
assert np.array_equal(np.nan_to_num(adata_all.raw.X), np.nan_to_num(adata_all.X))
- adata3.raw = adata4
+ adata3.raw = adata4.copy()
adata_all = AnnData.concatenate(adata1, adata2, adata3, join="outer")
assert isinstance(adata_all.raw, Raw)
assert set(adata_all.raw.var_names) == set("abcdz")
@@ -1375,8 +1376,9 @@ def test_concat_size_0_axis(axis_name, join_type, merge_strategy, shape):
"""Regression test for https://github.com/scverse/anndata/issues/526"""
axis, axis_name = merge._resolve_axis(axis_name)
alt_axis = 1 - axis
- a = gen_adata((5, 7))
- b = gen_adata(shape)
+ col_dtypes = (*DEFAULT_COL_TYPES, pd.StringDtype)
+ a = gen_adata((5, 7), obs_dtypes=col_dtypes, var_dtypes=col_dtypes)
+ b = gen_adata(shape, obs_dtypes=col_dtypes, var_dtypes=col_dtypes)
expected_size = expected_shape(a, b, axis=axis, join=join_type)
@@ -1633,3 +1635,23 @@ def test_concat_on_var_outer_join(array_type):
# This shouldn't error
# TODO: specify expected result while accounting for null value
_ = concat([a, b], join="outer", axis=1)
+
+
+def test_concat_dask_sparse_matches_memory(join_type, merge_strategy):
+ import dask.array as da
+
+ X = sparse.random(50, 20, density=0.5, format="csr")
+ X_dask = da.from_array(X, chunks=(5, 20))
+ var_names_1 = [f"gene_{i}" for i in range(20)]
+ var_names_2 = [f"gene_{i}{'_foo' if (i%2) else ''}" for i in range(20, 40)]
+
+ ad1 = AnnData(X=X, var=pd.DataFrame(index=var_names_1))
+ ad2 = AnnData(X=X, var=pd.DataFrame(index=var_names_2))
+
+ ad1_dask = AnnData(X=X_dask, var=pd.DataFrame(index=var_names_1))
+ ad2_dask = AnnData(X=X_dask, var=pd.DataFrame(index=var_names_2))
+
+ res_in_memory = concat([ad1, ad2], join=join_type, merge=merge_strategy)
+ res_dask = concat([ad1_dask, ad2_dask], join=join_type, merge=merge_strategy)
+
+ assert_equal(res_in_memory, res_dask)
diff --git a/tests/test_concatenate_disk.py b/tests/test_concatenate_disk.py
index c2c9eb95a..a05d9a308 100644
--- a/tests/test_concatenate_disk.py
+++ b/tests/test_concatenate_disk.py
@@ -10,8 +10,8 @@
from anndata import AnnData, concat
from anndata._core.merge import _resolve_axis
-from anndata.experimental import read_elem, write_elem
from anndata.experimental.merge import as_group, concat_on_disk
+from anndata.io import read_elem, write_elem
from anndata.tests.helpers import (
assert_equal,
gen_adata,
diff --git a/tests/test_dask.py b/tests/test_dask.py
index 59616171b..21db60e7e 100644
--- a/tests/test_dask.py
+++ b/tests/test_dask.py
@@ -12,7 +12,6 @@
import anndata as ad
from anndata._core.anndata import AnnData
from anndata.compat import CupyArray, DaskArray
-from anndata.experimental import read_elem, write_elem
from anndata.experimental.merge import as_group
from anndata.tests.helpers import (
GEN_ADATA_DASK_ARGS,
@@ -123,10 +122,10 @@ def test_dask_distributed_write(adata, tmp_path, diskfmt):
orig = adata
if diskfmt == "h5ad":
with pytest.raises(ValueError, match=r"Cannot write dask arrays to hdf5"):
- write_elem(g, "", orig)
+ ad.io.write_elem(g, "", orig)
return
- write_elem(g, "", orig)
- curr = read_elem(g)
+ ad.io.write_elem(g, "", orig)
+ curr = ad.io.read_elem(g)
with pytest.raises(AssertionError):
assert_equal(curr.obsm["a"], curr.obsm["b"])
diff --git a/tests/test_deprecations.py b/tests/test_deprecations.py
index 7d4bdf6e2..b5cc44c29 100644
--- a/tests/test_deprecations.py
+++ b/tests/test_deprecations.py
@@ -11,12 +11,10 @@
import h5py
import numpy as np
import pytest
-import zarr
from scipy import sparse
-import anndata as ad
-from anndata import AnnData
-from anndata.experimental import CSRDataset, write_elem
+import anndata.experimental
+from anndata import AnnData, read
from anndata.tests.helpers import assert_equal
@@ -27,7 +25,7 @@ def adata():
obs=dict(obs_names=["s1", "s2"], anno1=["c1", "c2"]),
var=dict(var_names=["a", "b", "c"]),
)
- adata.raw = adata
+ adata.raw = adata.copy()
adata.layers["x2"] = adata.X * 2
adata.var["anno2"] = ["p1", "p2", "p3"]
adata.X = adata.X / 2
@@ -103,8 +101,8 @@ def test_dtype_warning():
def test_deprecated_write_attribute(tmp_path):
pth = tmp_path / "file.h5"
A = np.random.randn(20, 10)
- from anndata._io.specs import read_elem
from anndata._io.utils import read_attribute, write_attribute
+ from anndata.io import read_elem
with h5py.File(pth, "w") as f:
with pytest.warns(DeprecationWarning, match=r"write_elem"):
@@ -124,39 +122,26 @@ def test_deprecated_read(tmp_path):
memory.write_h5ad(tmp_path / "file.h5ad")
with pytest.warns(FutureWarning, match=r"`anndata.read` is deprecated"):
- from_disk = ad.read(tmp_path / "file.h5ad")
+ from_disk = read(tmp_path / "file.h5ad")
assert_equal(memory, from_disk)
-def test_deprecated_sparse_dataset_values():
- import zarr
-
- from anndata.experimental import sparse_dataset, write_elem
-
- mtx = sparse.random(50, 50, format="csr")
- g = zarr.group()
-
- write_elem(g, "mtx", mtx)
- mtx_backed = sparse_dataset(g["mtx"])
-
- with pytest.warns(FutureWarning, match=r"Please use .to_memory()"):
- mtx_backed.value
-
- with pytest.warns(FutureWarning, match=r"Please use .format"):
- mtx_backed.format_str
-
-
-def test_deprecated_sparse_dataset():
- from anndata._core.sparse_dataset import SparseDataset
-
- mem_X = sparse.random(50, 50, format="csr")
- g = zarr.group()
- write_elem(g, "X", mem_X)
- with pytest.warns(FutureWarning, match=r"SparseDataset is deprecated"):
- X = SparseDataset(g["X"])
-
- assert isinstance(X, CSRDataset)
-
- with pytest.warns(FutureWarning, match=r"SparseDataset is deprecated"):
- assert isinstance(X, SparseDataset)
+@pytest.mark.parametrize(
+ ("old_name", "new_name", "module"),
+ (
+ (old_name, new_name, module)
+ for module in [anndata, anndata.experimental]
+ for (old_name, new_name) in module._DEPRECATED.items()
+ ),
+)
+def test_warn_on_import_with_redirect(old_name: str, new_name: str, module):
+ with pytest.warns(FutureWarning, match=rf"Importing {old_name}.*is deprecated"):
+ getattr(module, old_name)
+
+
+def test_warn_on_deprecated__io_module():
+ with pytest.warns(
+ FutureWarning, match=r"Importing read_h5ad from `anndata._io` is deprecated"
+ ):
+ from anndata._io import read_h5ad # noqa
diff --git a/tests/test_get_vector.py b/tests/test_get_vector.py
index baf0fd7d6..87af324b0 100644
--- a/tests/test_get_vector.py
+++ b/tests/test_get_vector.py
@@ -36,7 +36,7 @@ def test_amgibuous_keys():
),
)
- adata.raw = adata
+ adata.raw = adata.copy()
for k in var_keys:
# These are mostly to check that the test is working
diff --git a/tests/test_gpu.py b/tests/test_gpu.py
index c6f49a696..8f3c4c250 100644
--- a/tests/test_gpu.py
+++ b/tests/test_gpu.py
@@ -24,7 +24,7 @@ def test_adata_raw_gpu():
adata = AnnData(
X=cupy_sparse.random(500, 50, density=0.01, format="csr", dtype=cp.float32)
)
- adata.raw = adata
+ adata.raw = adata.copy()
assert isinstance(adata.raw.X, sparse.csr_matrix)
diff --git a/tests/test_helpers.py b/tests/test_helpers.py
index adf5a7dce..4645fedd5 100644
--- a/tests/test_helpers.py
+++ b/tests/test_helpers.py
@@ -18,6 +18,7 @@
BASE_MATRIX_PARAMS,
CUPY_MATRIX_PARAMS,
DASK_MATRIX_PARAMS,
+ DEFAULT_COL_TYPES,
as_cupy,
as_cupy_sparse_dask_array,
as_dense_cupy_dask_array,
@@ -26,6 +27,8 @@
assert_equal,
gen_adata,
gen_awkward,
+ gen_random_column,
+ issubdtype,
report_name,
)
from anndata.utils import axis_len
@@ -89,6 +92,18 @@ def test_gen_awkward(shape, datashape):
assert arr.type == arr_type
+@pytest.mark.parametrize("dtype", [*DEFAULT_COL_TYPES, pd.StringDtype])
+def test_gen_random_column(dtype):
+ _, col = gen_random_column(10, dtype)
+ assert len(col) == 10
+ # CategoricalDtypes are the only one specified as instances currently
+ if isinstance(dtype, pd.CategoricalDtype):
+ assert issubdtype(col.dtype, pd.CategoricalDtype)
+ assert col.dtype.ordered == dtype.ordered
+ else:
+ assert issubdtype(col.dtype, dtype)
+
+
# Does this work for every warning?
def test_report_name():
def raise_error():
diff --git a/tests/test_io_conversion.py b/tests/test_io_conversion.py
index 33f50b6d9..217a9cc16 100644
--- a/tests/test_io_conversion.py
+++ b/tests/test_io_conversion.py
@@ -39,7 +39,7 @@ def test_sparse_to_dense_disk(tmp_path, mtx_format, to_convert):
dense_from_mem_pth = tmp_path / "dense_mem.h5ad"
dense_from_disk_pth = tmp_path / "dense_disk.h5ad"
mem = gen_adata((50, 50), mtx_format)
- mem.raw = mem
+ mem.raw = mem.copy()
mem.write_h5ad(mem_pth)
disk = ad.read_h5ad(mem_pth, backed="r")
@@ -66,7 +66,7 @@ def test_sparse_to_dense_disk(tmp_path, mtx_format, to_convert):
def test_sparse_to_dense_inplace(tmp_path, spmtx_format):
pth = tmp_path / "adata.h5ad"
orig = gen_adata((50, 50), spmtx_format)
- orig.raw = orig
+ orig.raw = orig.copy()
orig.write(pth)
backed = ad.read_h5ad(pth, backed="r+")
backed.write(as_dense=("X", "raw/X"))
@@ -97,7 +97,7 @@ def test_sparse_to_dense_errors(tmp_path):
def test_dense_to_sparse_memory(tmp_path, spmtx_format, to_convert):
dense_path = tmp_path / "dense.h5ad"
orig = gen_adata((50, 50), np.array)
- orig.raw = orig
+ orig.raw = orig.copy()
orig.write_h5ad(dense_path)
assert not isinstance(orig.X, sparse.spmatrix)
assert not isinstance(orig.raw.X, sparse.spmatrix)
diff --git a/tests/test_io_dispatched.py b/tests/test_io_dispatched.py
index 4dba9b6aa..c0d60c18d 100644
--- a/tests/test_io_dispatched.py
+++ b/tests/test_io_dispatched.py
@@ -8,12 +8,7 @@
import anndata as ad
from anndata.compat import SpArray
-from anndata.experimental import (
- read_dispatched,
- read_elem,
- write_dispatched,
- write_elem,
-)
+from anndata.experimental import read_dispatched, write_dispatched
from anndata.tests.helpers import assert_equal, gen_adata
@@ -29,7 +24,7 @@ def read_only_axis_dfs(func, elem_name: str, elem, iospec):
adata = gen_adata((1000, 100))
z = zarr.group()
- write_elem(z, "/", adata)
+ ad.io.write_elem(z, "/", adata)
expected = ad.AnnData(obs=adata.obs, var=adata.var)
actual = read_dispatched(z, read_only_axis_dfs)
@@ -48,7 +43,7 @@ def read_as_dask_array(func, elem_name: str, elem, iospec):
"awkward-array",
}:
# Preventing recursing inside of these types
- return read_elem(elem)
+ return ad.io.read_elem(elem)
elif iospec.encoding_type == "array":
return da.from_zarr(elem)
else:
@@ -56,7 +51,7 @@ def read_as_dask_array(func, elem_name: str, elem, iospec):
adata = gen_adata((1000, 100))
z = zarr.group()
- write_elem(z, "/", adata)
+ ad.io.write_elem(z, "/", adata)
dask_adata = read_dispatched(z, read_as_dask_array)
@@ -64,7 +59,7 @@ def read_as_dask_array(func, elem_name: str, elem, iospec):
assert isinstance(dask_adata.obsm["array"], da.Array)
assert isinstance(dask_adata.uns["nested"]["nested_further"]["array"], da.Array)
- expected = read_elem(z)
+ expected = ad.io.read_elem(z)
actual = dask_adata.to_memory(copy=False)
assert_equal(expected, actual)
@@ -73,10 +68,10 @@ def read_as_dask_array(func, elem_name: str, elem, iospec):
def test_read_dispatched_null_case():
adata = gen_adata((100, 100))
z = zarr.group()
- write_elem(z, "/", adata)
+ ad.io.write_elem(z, "/", adata)
- expected = read_elem(z)
- actual = read_dispatched(z, lambda _, __, x, **___: read_elem(x))
+ expected = ad.io.read_elem(z)
+ actual = read_dispatched(z, lambda _, __, x, **___: ad.io.read_elem(x))
assert_equal(expected, actual)
@@ -101,7 +96,7 @@ def set_copy(d, **kwargs):
# TODO: Should the passed path be absolute?
path = "/" + store.path + "/" + k
if hasattr(elem, "shape") and not isinstance(
- elem, (sparse.spmatrix, SpArray, ad.AnnData)
+ elem, sparse.spmatrix | SpArray | ad.AnnData
):
if re.match(r"^/((X)|(layers)).*", path):
chunks = (M, N)
diff --git a/tests/test_io_elementwise.py b/tests/test_io_elementwise.py
index a4d614c6f..d93b9937c 100644
--- a/tests/test_io_elementwise.py
+++ b/tests/test_io_elementwise.py
@@ -20,12 +20,11 @@
_REGISTRY,
IOSpec,
get_spec,
- read_elem,
- read_elem_as_dask,
- write_elem,
)
from anndata._io.specs.registry import IORegistryError
-from anndata.compat import ZarrGroup, _read_attr
+from anndata.compat import CAN_USE_SPARSE_ARRAY, SpArray, ZarrGroup, _read_attr
+from anndata.experimental import read_elem_as_dask
+from anndata.io import read_elem, write_elem
from anndata.tests.helpers import (
as_cupy,
as_cupy_sparse_dask_array,
@@ -35,6 +34,7 @@
)
if TYPE_CHECKING:
+ from pathlib import Path
from typing import Literal, TypeVar
from anndata.compat import H5Group
@@ -138,15 +138,34 @@ def create_sparse_store(
id="sp_mat_csc",
),
pytest.param(pd.DataFrame({"a": [1, 2, 3]}), "dataframe", id="pd_df"),
- pytest.param(pd.Categorical(list("aabccedd")), "categorical", id="pd_cat"),
+ pytest.param(
+ pd.Categorical(list("aabccedd") + [pd.NA]),
+ "categorical",
+ id="pd_cat_np_str",
+ ),
pytest.param(
pd.Categorical(list("aabccedd"), ordered=True),
"categorical",
- id="pd_cat_ord",
+ id="pd_cat_np_str_ord",
+ ),
+ pytest.param(
+ pd.array(list("aabccedd") + [pd.NA], dtype="string").astype("category"),
+ "categorical",
+ id="pd_cat_pd_str",
),
pytest.param(
pd.Categorical([1, 2, 1, 3], ordered=True), "categorical", id="pd_cat_num"
),
+ pytest.param(
+ pd.array(["hello", "world"], dtype="string"),
+ "nullable-string-array",
+ id="pd_arr_str",
+ ),
+ pytest.param(
+ pd.array(["hello", "world", pd.NA], dtype="string"),
+ "nullable-string-array",
+ id="pd_arr_str_mask",
+ ),
pytest.param(
pd.arrays.IntegerArray(
np.ones(5, dtype=int), mask=np.array([True, False, True, False, True])
@@ -187,6 +206,8 @@ def create_sparse_store(
],
)
def test_io_spec(store, value, encoding_type):
+ ad.settings.allow_write_nullable_strings = True
+
key = f"key_for_{encoding_type}"
write_elem(store, key, value, dataset_kwargs={})
@@ -275,6 +296,8 @@ def test_read_lazy_2d_dask(sparse_format, store):
(2, (200, 400)),
(1, None),
(2, None),
+ (2, (400, -1)),
+ (2, (400, None)),
],
)
def test_read_lazy_subsets_nd_dask(store, n_dims, chunks):
@@ -307,28 +330,36 @@ def test_read_lazy_h5_cluster(sparse_format, tmp_path):
@pytest.mark.parametrize(
- ("arr_type", "chunks"),
+ ("arr_type", "chunks", "expected_chunksize"),
[
- ("dense", (100, 100)),
- ("csc", (SIZE, 10)),
- ("csr", (10, SIZE * 2)),
- ("csc", None),
- ("csr", None),
+ ("dense", (100, 100), (100, 100)),
+ ("csc", (SIZE, 10), (SIZE, 10)),
+ ("csr", (10, SIZE * 2), (10, SIZE * 2)),
+ ("csc", None, (SIZE, 1000)),
+ ("csr", None, (1000, SIZE * 2)),
+ ("csr", (10, -1), (10, SIZE * 2)),
+ ("csc", (-1, 10), (SIZE, 10)),
+ ("csr", (10, None), (10, SIZE * 2)),
+ ("csc", (None, 10), (SIZE, 10)),
+ ("csc", (None, None), (SIZE, SIZE * 2)),
+ ("csr", (None, None), (SIZE, SIZE * 2)),
+ ("csr", (-1, -1), (SIZE, SIZE * 2)),
+ ("csc", (-1, -1), (SIZE, SIZE * 2)),
],
)
-def test_read_lazy_2d_chunk_kwargs(store, arr_type, chunks):
+def test_read_lazy_2d_chunk_kwargs(
+ store: H5Group | ZarrGroup,
+ arr_type: Literal["csr", "csc", "dense"],
+ chunks: None | tuple[int | None, int | None],
+ expected_chunksize: tuple[int, int],
+):
if arr_type == "dense":
arr_store = create_dense_store(store)
X_dask_from_disk = read_elem_as_dask(arr_store["X"], chunks=chunks)
else:
arr_store = create_sparse_store(arr_type, store)
X_dask_from_disk = read_elem_as_dask(arr_store["X"], chunks=chunks)
- if chunks is not None:
- assert X_dask_from_disk.chunksize == chunks
- else:
- minor_index = int(arr_type == "csr")
- # assert that sparse chunks are set correctly by default
- assert X_dask_from_disk.chunksize[minor_index] == SIZE * (1 + minor_index)
+ assert X_dask_from_disk.chunksize == expected_chunksize
X_from_disk = read_elem(arr_store["X"])
assert_equal(X_from_disk, X_dask_from_disk)
@@ -365,7 +396,7 @@ def test_write_indptr_dtype_override(store, sparse_format):
def test_io_spec_raw(store):
adata = gen_adata((3, 2))
- adata.raw = adata
+ adata.raw = adata.copy()
write_elem(store, "adata", adata)
@@ -422,6 +453,11 @@ def test_write_io_error(store, obj):
assert re.search(full_pattern, msg)
+def test_write_nullable_string_error(store):
+ with pytest.raises(RuntimeError, match=r"allow_write_nullable_strings.*is False"):
+ write_elem(store, "/el", pd.array([""], dtype="string"))
+
+
def test_categorical_order_type(store):
# https://github.com/scverse/anndata/issues/853
cat = pd.Categorical([0, 1], ordered=True)
@@ -556,3 +592,22 @@ def test_io_pd_cow(store, copy_on_write):
write_elem(store, "adata", orig)
from_store = read_elem(store["adata"])
assert_equal(orig, from_store)
+
+
+def test_read_sparse_array(
+ tmp_path: Path,
+ sparse_format: Literal["csr", "csc"],
+ diskfmt: Literal["h5ad", "zarr"],
+):
+ path = tmp_path / f"test.{diskfmt.replace('ad', '')}"
+ a = sparse.random(100, 100, format=sparse_format)
+ if diskfmt == "zarr":
+ f = zarr.open_group(path, "a")
+ else:
+ f = h5py.File(path, "a")
+ ad.io.write_elem(f, "mtx", a)
+ if not CAN_USE_SPARSE_ARRAY:
+ pytest.skip("scipy.sparse.cs{r,c}array not available")
+ ad.settings.use_sparse_array_on_read = True
+ mtx = ad.io.read_elem(f["mtx"])
+ assert issubclass(type(mtx), SpArray)
diff --git a/tests/test_io_partial.py b/tests/test_io_partial.py
index d43aaca1c..76ff05627 100644
--- a/tests/test_io_partial.py
+++ b/tests/test_io_partial.py
@@ -1,5 +1,6 @@
from __future__ import annotations
+import warnings
from importlib.util import find_spec
from pathlib import Path
@@ -10,9 +11,8 @@
from scipy.sparse import csr_matrix
from anndata import AnnData
-from anndata._io import write_h5ad, write_zarr
-from anndata._io.specs import read_elem
from anndata._io.specs.registry import read_elem_partial
+from anndata.io import read_elem, write_h5ad, write_zarr
X = np.array([[1.0, 0.0, 3.0], [4.0, 0.0, 6.0], [0.0, 8.0, 0.0]], dtype="float32")
X_check = np.array([[4.0, 0.0], [0.0, 8.0]], dtype="float32")
@@ -44,7 +44,11 @@ def test_read_partial_X(tmp_path, typ, accessor):
@pytest.mark.skipif(not find_spec("scanpy"), reason="Scanpy is not installed")
@pytest.mark.parametrize("accessor", ["h5ad", "zarr"])
def test_read_partial_adata(tmp_path, accessor):
- import scanpy as sc
+ with warnings.catch_warnings():
+ warnings.filterwarnings(
+ "ignore", message=r"Importing read_.* from `anndata` is deprecated"
+ )
+ import scanpy as sc
adata = sc.datasets.pbmc68k_reduced()
diff --git a/tests/test_io_utils.py b/tests/test_io_utils.py
index f50249bad..25c66f46d 100644
--- a/tests/test_io_utils.py
+++ b/tests/test_io_utils.py
@@ -12,7 +12,6 @@
from anndata._io.specs.registry import IORegistryError
from anndata._io.utils import report_read_key_on_error
from anndata.compat import _clean_uns
-from anndata.experimental import read_elem, write_elem
if TYPE_CHECKING:
from collections.abc import Callable
@@ -108,10 +107,10 @@ class Foo:
pattern = r"(?s)^((?!Error raised while writing key '/?a').)*$"
with pytest.raises(IORegistryError, match=pattern):
- write_elem(group, "/", {"a": {"b": Foo()}})
+ ad.io.write_elem(group, "/", {"a": {"b": Foo()}})
- write_elem(group, "/", {"a": {"b": [1, 2, 3]}})
+ ad.io.write_elem(group, "/", {"a": {"b": [1, 2, 3]}})
group["a/b"].attrs["encoding-type"] = "not a real encoding type"
with pytest.raises(IORegistryError, match=pattern):
- read_elem(group)
+ ad.io.read_elem(group)
diff --git a/tests/test_io_warnings.py b/tests/test_io_warnings.py
index 29ab2d963..0e3848168 100644
--- a/tests/test_io_warnings.py
+++ b/tests/test_io_warnings.py
@@ -15,7 +15,11 @@
@pytest.mark.skipif(not find_spec("scanpy"), reason="Scanpy is not installed")
def test_old_format_warning_thrown():
- import scanpy as sc
+ with warnings.catch_warnings():
+ warnings.filterwarnings(
+ "ignore", message=r"Importing read_.* from `anndata` is deprecated"
+ )
+ import scanpy as sc
pth = Path(sc.datasets.__file__).parent / "10x_pbmc68k_reduced.h5ad"
# TODO: with Pytest 8, all this can be a
diff --git a/tests/test_obsmvarm.py b/tests/test_obsmvarm.py
index 91516de2f..d79c7bb5a 100644
--- a/tests/test_obsmvarm.py
+++ b/tests/test_obsmvarm.py
@@ -85,21 +85,21 @@ def test_setting_dataframe(adata: AnnData):
def test_setting_sparse(adata: AnnData):
- obsm_sparse = sparse.random(M, 100)
+ obsm_sparse = sparse.random(M, 100, format="csr")
adata.obsm["a"] = obsm_sparse
assert not np.any((adata.obsm["a"] != obsm_sparse).data)
- varm_sparse = sparse.random(N, 100)
+ varm_sparse = sparse.random(N, 100, format="csr")
adata.varm["a"] = varm_sparse
assert not np.any((adata.varm["a"] != varm_sparse).data)
h = joblib.hash(adata)
- bad_obsm_sparse = sparse.random(M * 2, M)
+ bad_obsm_sparse = sparse.random(M * 2, M, format="csr")
with pytest.raises(ValueError, match=r"incorrect shape"):
adata.obsm["b"] = bad_obsm_sparse
- bad_varm_sparse = sparse.random(N * 2, N)
+ bad_varm_sparse = sparse.random(N * 2, N, format="csr")
with pytest.raises(ValueError, match=r"incorrect shape"):
adata.varm["b"] = bad_varm_sparse
diff --git a/tests/test_obspvarp.py b/tests/test_obspvarp.py
index 311a8d2bb..42fc47172 100644
--- a/tests/test_obspvarp.py
+++ b/tests/test_obspvarp.py
@@ -65,21 +65,21 @@ def test_setting_ndarray(adata: AnnData):
def test_setting_sparse(adata: AnnData):
- obsp_sparse = sparse.random(M, M)
+ obsp_sparse = sparse.random(M, M, format="csr")
adata.obsp["a"] = obsp_sparse
assert not np.any((adata.obsp["a"] != obsp_sparse).data)
- varp_sparse = sparse.random(N, N)
+ varp_sparse = sparse.random(N, N, format="csr")
adata.varp["a"] = varp_sparse
assert not np.any((adata.varp["a"] != varp_sparse).data)
h = joblib.hash(adata)
- bad_obsp_sparse = sparse.random(M * 2, M)
+ bad_obsp_sparse = sparse.random(M * 2, M, format="csr")
with pytest.raises(ValueError, match=r"incorrect shape"):
adata.obsp["b"] = bad_obsp_sparse
- bad_varp_sparse = sparse.random(N * 2, N)
+ bad_varp_sparse = sparse.random(N * 2, N, format="csr")
with pytest.raises(ValueError, match=r"incorrect shape"):
adata.varp["b"] = bad_varp_sparse
diff --git a/tests/test_raw.py b/tests/test_raw.py
index a21f21f8a..d0ee86833 100644
--- a/tests/test_raw.py
+++ b/tests/test_raw.py
@@ -38,7 +38,7 @@ def adata_raw() -> ad.AnnData:
adata = ad.AnnData(
np.array(data, dtype="int32"), obs=obs_dict, var=var_dict, uns=uns_dict
)
- adata.raw = adata
+ adata.raw = adata.copy()
# Make them different shapes
adata = adata[:, [0, 1]].copy()
return adata
@@ -131,7 +131,7 @@ def test_raw_as_parent_view():
# https://github.com/scverse/anndata/issues/288
a = ad.AnnData(np.ones((4, 3)))
a.varm["PCs"] = np.ones((3, 3))
- a.raw = a
+ a.raw = a.copy()
# create a Raw containing views. This used to trigger #288.
b = a.raw[:, "0"]
# actualize
@@ -165,3 +165,10 @@ def test_to_adata_populates_obs():
from_raw = adata_w_raw.raw.to_adata()
assert_equal(adata, from_raw)
+
+
+def test_no_copy():
+ adata = gen_adata((20, 10), X_type=np.asarray)
+ adata.raw = adata # no .copy() herer
+ np.log1p(adata.X, out=adata.X)
+ assert adata.X is adata.raw.X
diff --git a/tests/test_readwrite.py b/tests/test_readwrite.py
index 8ad2870c4..518559995 100644
--- a/tests/test_readwrite.py
+++ b/tests/test_readwrite.py
@@ -3,6 +3,7 @@
import re
import warnings
from contextlib import contextmanager
+from functools import partial
from importlib.util import find_spec
from pathlib import Path
from string import ascii_letters
@@ -25,6 +26,7 @@
if TYPE_CHECKING:
from os import PathLike
+ from typing import Literal
HERE = Path(__file__).parent
@@ -133,7 +135,7 @@ def test_readwrite_kitchensink(tmp_path, storage, typ, backing_h5ad, dataset_kwa
X = typ(X_list)
adata_src = ad.AnnData(X, obs=obs_dict, var=var_dict, uns=uns_dict)
assert not isinstance(adata_src.obs["oanno1"].dtype, pd.CategoricalDtype)
- adata_src.raw = adata_src
+ adata_src.raw = adata_src.copy()
if storage == "h5ad":
adata_src.write(backing_h5ad, **dataset_kwargs)
@@ -161,16 +163,16 @@ def test_readwrite_kitchensink(tmp_path, storage, typ, backing_h5ad, dataset_kwa
if isinstance(adata_src.raw.X, SpArray):
assert isinstance(adata.raw.X, sparse.spmatrix)
else:
- assert isinstance(adata_src.raw.X, (type(adata.raw.X), DaskArray))
+ assert isinstance(adata_src.raw.X, type(adata.raw.X) | DaskArray)
assert isinstance(
- adata_src.uns["uns4"]["c"], (type(adata.uns["uns4"]["c"]), DaskArray)
+ adata_src.uns["uns4"]["c"], type(adata.uns["uns4"]["c"]) | DaskArray
)
- assert isinstance(adata_src.varm, (type(adata.varm), DaskArray))
+ assert isinstance(adata_src.varm, type(adata.varm) | DaskArray)
assert_equal(adata.raw.X, adata_src.raw.X)
pd.testing.assert_frame_equal(adata.raw.var, adata_src.raw.var)
- assert isinstance(adata.uns["uns4"]["a"], (int, np.integer))
- assert isinstance(adata_src.uns["uns4"]["a"], (int, np.integer))
+ assert isinstance(adata.uns["uns4"]["a"], int | np.integer)
+ assert isinstance(adata_src.uns["uns4"]["a"], int | np.integer)
assert_equal(adata, adata_src)
@@ -242,7 +244,7 @@ def test_readwrite_equivalent_h5ad_zarr(tmp_path, typ):
M, N = 100, 101
adata = gen_adata((M, N), X_type=typ)
- adata.raw = adata
+ adata.raw = adata.copy()
adata.write_h5ad(h5ad_pth)
adata.write_zarr(zarr_pth)
@@ -339,7 +341,7 @@ def test_zarr_compression(tmp_path):
compressor = Blosc(cname="zstd", clevel=3, shuffle=Blosc.BITSHUFFLE)
not_compressed = []
- ad._io.write_zarr(pth, adata, compressor=compressor)
+ ad.io.write_zarr(pth, adata, compressor=compressor)
def check_compressed(key, value):
if isinstance(value, zarr.Array) and value.shape != ():
@@ -405,7 +407,7 @@ def test_readwrite_loom(typ, obsm_mapping, varm_mapping, tmp_path):
)
adata_src.write_loom(tmp_path / "test.loom", write_obsm_varm=True)
- adata = ad.read_loom(
+ adata = ad.io.read_loom(
tmp_path / "test.loom",
sparse=typ is csr_matrix,
obsm_mapping=obsm_mapping,
@@ -455,37 +457,37 @@ def test_readloom_deprecations(tmp_path):
# obsm_names -> obsm_mapping
obsm_mapping = {"df": adata_src.obs.columns}
with pytest.warns(FutureWarning):
- depr_result = ad.read_loom(loom_pth, obsm_names=obsm_mapping)
- actual_result = ad.read_loom(loom_pth, obsm_mapping=obsm_mapping)
+ depr_result = ad.io.read_loom(loom_pth, obsm_names=obsm_mapping)
+ actual_result = ad.io.read_loom(loom_pth, obsm_mapping=obsm_mapping)
assert_equal(actual_result, depr_result)
with pytest.raises(ValueError, match=r"ambiguous"), pytest.warns(FutureWarning):
- ad.read_loom(loom_pth, obsm_mapping=obsm_mapping, obsm_names=obsm_mapping)
+ ad.io.read_loom(loom_pth, obsm_mapping=obsm_mapping, obsm_names=obsm_mapping)
# varm_names -> varm_mapping
varm_mapping = {"df": adata_src.var.columns}
with pytest.warns(FutureWarning):
- depr_result = ad.read_loom(loom_pth, varm_names=varm_mapping)
- actual_result = ad.read_loom(loom_pth, varm_mapping=varm_mapping)
+ depr_result = ad.io.read_loom(loom_pth, varm_names=varm_mapping)
+ actual_result = ad.io.read_loom(loom_pth, varm_mapping=varm_mapping)
assert_equal(actual_result, depr_result)
with pytest.raises(ValueError, match=r"ambiguous"), pytest.warns(FutureWarning):
- ad.read_loom(loom_pth, varm_mapping=varm_mapping, varm_names=varm_mapping)
+ ad.io.read_loom(loom_pth, varm_mapping=varm_mapping, varm_names=varm_mapping)
# positional -> keyword
with pytest.warns(FutureWarning, match=r"sparse"):
- depr_result = ad.read_loom(loom_pth, True)
- actual_result = ad.read_loom(loom_pth, sparse=True)
+ depr_result = ad.io.read_loom(loom_pth, True)
+ actual_result = ad.io.read_loom(loom_pth, sparse=True)
assert type(depr_result.X) == type(actual_result.X)
def test_read_csv():
- adata = ad.read_csv(HERE / "data" / "adata.csv")
+ adata = ad.io.read_csv(HERE / "data" / "adata.csv")
assert adata.obs_names.tolist() == ["r1", "r2", "r3"]
assert adata.var_names.tolist() == ["c1", "c2"]
assert adata.X.tolist() == X_list
def test_read_tsv_strpath():
- adata = ad.read_text(str(HERE / "data" / "adata-comments.tsv"), "\t")
+ adata = ad.io.read_text(str(HERE / "data" / "adata-comments.tsv"), "\t")
assert adata.obs_names.tolist() == ["r1", "r2", "r3"]
assert adata.var_names.tolist() == ["c1", "c2"]
assert adata.X.tolist() == X_list
@@ -493,7 +495,7 @@ def test_read_tsv_strpath():
def test_read_tsv_iter():
with (HERE / "data" / "adata-comments.tsv").open() as f:
- adata = ad.read_text(f, "\t")
+ adata = ad.io.read_text(f, "\t")
assert adata.obs_names.tolist() == ["r1", "r2", "r3"]
assert adata.var_names.tolist() == ["c1", "c2"]
assert adata.X.tolist() == X_list
@@ -541,14 +543,14 @@ def hash_dir_contents(dir: Path) -> dict[str, bytes]:
@pytest.mark.parametrize(
("read", "write", "name"),
[
- pytest.param(ad.read_h5ad, ad._io.write_h5ad, "test_empty.h5ad"),
+ pytest.param(ad.read_h5ad, ad.io.write_h5ad, "test_empty.h5ad"),
pytest.param(
- ad.read_loom,
- ad._io.write_loom,
+ ad.io.read_loom,
+ ad.io.write_loom,
"test_empty.loom",
marks=pytest.mark.xfail(reason="Loom can’t handle 0×0 matrices"),
),
- pytest.param(ad.read_zarr, ad._io.write_zarr, "test_empty.zarr"),
+ pytest.param(ad.read_zarr, ad.io.write_zarr, "test_empty.zarr"),
],
)
def test_readwrite_empty(read, write, name, tmp_path):
@@ -565,12 +567,12 @@ def test_read_excel():
message=r"datetime.datetime.utcnow\(\) is deprecated",
category=DeprecationWarning,
)
- adata = ad.read_excel(HERE / "data/excel.xlsx", "Sheet1", dtype=int)
+ adata = ad.io.read_excel(HERE / "data/excel.xlsx", "Sheet1", dtype=int)
assert adata.X.tolist() == X_list
def test_read_umi_tools():
- adata = ad.read_umi_tools(HERE / "data/umi_tools.tsv.gz")
+ adata = ad.io.read_umi_tools(HERE / "data/umi_tools.tsv.gz")
assert adata.obs_names.name == "cell"
assert adata.var_names.name == "gene"
assert adata.shape == (2, 13)
@@ -658,30 +660,13 @@ def random_cats(n):
assert_equal(orig, curr)
-def test_write_string_types(tmp_path, diskfmt):
- # https://github.com/scverse/anndata/issues/456
- adata_pth = tmp_path / f"adata.{diskfmt}"
-
- adata = ad.AnnData(
- obs=pd.DataFrame(
- np.ones((3, 2)),
- columns=["a", np.str_("b")],
- index=["a", "b", "c"],
- ),
- )
-
- write = getattr(adata, f"write_{diskfmt}")
- read = getattr(ad, f"read_{diskfmt}")
-
- write(adata_pth)
- from_disk = read(adata_pth)
-
- assert_equal(adata, from_disk)
-
+def test_write_string_type_error(tmp_path, diskfmt):
+ adata = ad.AnnData(obs=dict(obs_names=list("abc")))
adata.obs[b"c"] = np.zeros(3)
+
# This should error, and tell you which key is at fault
with pytest.raises(TypeError, match=r"writing key 'obs'") as exc_info:
- write(adata_pth)
+ getattr(adata, f"write_{diskfmt}")(tmp_path / f"adata.{diskfmt}")
assert "b'c'" in str(exc_info.value)
@@ -722,38 +707,68 @@ def test_zarr_chunk_X(tmp_path):
# Round-tripping scanpy datasets
################################
-diskfmt2 = diskfmt
+
+def _do_roundtrip(
+ adata: ad.AnnData, pth: Path, diskfmt: Literal["h5ad", "zarr"]
+) -> ad.AnnData:
+ getattr(adata, f"write_{diskfmt}")(pth)
+ return getattr(ad, f"read_{diskfmt}")(pth)
+
+
+@pytest.fixture
+def roundtrip(diskfmt):
+ return partial(_do_roundtrip, diskfmt=diskfmt)
+
+
+def test_write_string_types(tmp_path, diskfmt, roundtrip):
+ # https://github.com/scverse/anndata/issues/456
+ adata_pth = tmp_path / f"adata.{diskfmt}"
+
+ adata = ad.AnnData(
+ obs=pd.DataFrame(
+ np.ones((3, 2)),
+ columns=["a", np.str_("b")],
+ index=["a", "b", "c"],
+ ),
+ )
+
+ from_disk = roundtrip(adata, adata_pth)
+
+ assert_equal(adata, from_disk)
@pytest.mark.skipif(not find_spec("scanpy"), reason="Scanpy is not installed")
-def test_scanpy_pbmc68k(tmp_path, diskfmt, diskfmt2):
- read1 = lambda pth: getattr(ad, f"read_{diskfmt}")(pth)
- write1 = lambda adata, pth: getattr(adata, f"write_{diskfmt}")(pth)
- read2 = lambda pth: getattr(ad, f"read_{diskfmt2}")(pth)
- write2 = lambda adata, pth: getattr(adata, f"write_{diskfmt2}")(pth)
+def test_scanpy_pbmc68k(tmp_path, diskfmt, roundtrip, diskfmt2):
+ roundtrip2 = partial(_do_roundtrip, diskfmt=diskfmt2)
filepth1 = tmp_path / f"test1.{diskfmt}"
filepth2 = tmp_path / f"test2.{diskfmt2}"
- import scanpy as sc
+ with warnings.catch_warnings():
+ warnings.filterwarnings(
+ "ignore", message=r"Importing read_.* from `anndata` is deprecated"
+ )
+ import scanpy as sc
with warnings.catch_warnings():
warnings.simplefilter("ignore", ad.OldFormatWarning)
pbmc = sc.datasets.pbmc68k_reduced()
- write1(pbmc, filepth1)
- from_disk1 = read1(filepth1) # Do we read okay
- write2(from_disk1, filepth2) # Can we round trip
- from_disk2 = read2(filepth2)
+ from_disk1 = roundtrip(pbmc, filepth1) # Do we read okay
+ from_disk2 = roundtrip2(from_disk1, filepth2) # Can we round trip
assert_equal(pbmc, from_disk1) # Not expected to be exact due to `nan`s
assert_equal(pbmc, from_disk2)
@pytest.mark.skipif(not find_spec("scanpy"), reason="Scanpy is not installed")
-def test_scanpy_krumsiek11(tmp_path, diskfmt):
+def test_scanpy_krumsiek11(tmp_path, diskfmt, roundtrip):
filepth = tmp_path / f"test.{diskfmt}"
- import scanpy as sc
+ with warnings.catch_warnings():
+ warnings.filterwarnings(
+ "ignore", message=r"Importing read_.* from `anndata` is deprecated"
+ )
+ import scanpy as sc
# TODO: this should be fixed in scanpy instead
with pytest.warns(UserWarning, match=r"Observation names are not unique"):
@@ -761,11 +776,10 @@ def test_scanpy_krumsiek11(tmp_path, diskfmt):
del orig.uns["highlights"] # Can’t write int keys
# Can’t write "string" dtype: https://github.com/scverse/anndata/issues/679
orig.obs["cell_type"] = orig.obs["cell_type"].astype(str)
- getattr(orig, f"write_{diskfmt}")(filepth)
with pytest.warns(UserWarning, match=r"Observation names are not unique"):
- read = getattr(ad, f"read_{diskfmt}")(filepth)
+ curr = roundtrip(orig, filepth)
- assert_equal(orig, read, exact=True)
+ assert_equal(orig, curr, exact=True)
# Checking if we can read legacy zarr files
@@ -777,7 +791,11 @@ def test_scanpy_krumsiek11(tmp_path, diskfmt):
reason="File not present.",
)
def test_backwards_compat_zarr():
- import scanpy as sc
+ with warnings.catch_warnings():
+ warnings.filterwarnings(
+ "ignore", message=r"Importing read_.* from `anndata` is deprecated"
+ )
+ import scanpy as sc
import zarr
pbmc_orig = sc.datasets.pbmc68k_reduced()
@@ -796,11 +814,8 @@ def test_backwards_compat_zarr():
assert_equal(pbmc_zarr, pbmc_orig)
-# TODO: use diskfmt fixture once zarr backend implemented
-def test_adata_in_uns(tmp_path, diskfmt):
+def test_adata_in_uns(tmp_path, diskfmt, roundtrip):
pth = tmp_path / f"adatas_in_uns.{diskfmt}"
- read = lambda pth: getattr(ad, f"read_{diskfmt}")(pth)
- write = lambda adata, pth: getattr(adata, f"write_{diskfmt}")(pth)
orig = gen_adata((4, 5))
orig.uns["adatas"] = {
@@ -811,20 +826,16 @@ def test_adata_in_uns(tmp_path, diskfmt):
another_one.raw = gen_adata((2, 7))
orig.uns["adatas"]["b"].uns["another_one"] = another_one
- write(orig, pth)
- curr = read(pth)
+ curr = roundtrip(orig, pth)
assert_equal(orig, curr)
-def test_io_dtype(tmp_path, diskfmt, dtype):
+def test_io_dtype(tmp_path, diskfmt, dtype, roundtrip):
pth = tmp_path / f"adata_dtype.{diskfmt}"
- read = lambda pth: getattr(ad, f"read_{diskfmt}")(pth)
- write = lambda adata, pth: getattr(adata, f"write_{diskfmt}")(pth)
orig = ad.AnnData(np.ones((5, 8), dtype=dtype))
- write(orig, pth)
- curr = read(pth)
+ curr = roundtrip(orig, pth)
assert curr.X.dtype == dtype
diff --git a/tests/test_settings.py b/tests/test_settings.py
index 871141d92..ba7dba8f9 100644
--- a/tests/test_settings.py
+++ b/tests/test_settings.py
@@ -243,3 +243,16 @@ class TestEnum(Enum):
)
def test_describe(as_rst: bool, expected: str, settings: SettingsManager):
assert settings.describe("test_var_3", as_rst=as_rst) == expected
+
+
+def test_use_sparse_array_on_read():
+ import anndata as ad
+
+ if not ad.compat.CAN_USE_SPARSE_ARRAY:
+ with pytest.raises(
+ ValueError,
+ match=r"scipy.sparse.cs{r,c}array is not available in current scipy version",
+ ):
+ ad.settings.use_sparse_array_on_read = True
+ else:
+ ad.settings.use_sparse_array_on_read = True
diff --git a/tests/test_transpose.py b/tests/test_transpose.py
index 720733496..e672cf13d 100644
--- a/tests/test_transpose.py
+++ b/tests/test_transpose.py
@@ -24,7 +24,7 @@ def test_transpose_orig():
def _add_raw(adata, *, var_subset=slice(None)):
new = adata[:, var_subset].copy()
- new.raw = adata
+ new.raw = adata.copy()
return new
diff --git a/tests/test_views.py b/tests/test_views.py
index 2d4a0a78d..6e57e08c7 100644
--- a/tests/test_views.py
+++ b/tests/test_views.py
@@ -3,6 +3,7 @@
from contextlib import ExitStack
from copy import deepcopy
from operator import mul
+from typing import TYPE_CHECKING
import joblib
import numpy as np
@@ -35,6 +36,9 @@
)
from anndata.utils import asarray
+if TYPE_CHECKING:
+ from types import EllipsisType
+
IGNORE_SPARSE_EFFICIENCY_WARNING = pytest.mark.filterwarnings(
"ignore:Changing the sparsity structure:scipy.sparse.SparseEfficiencyWarning"
)
@@ -525,7 +529,7 @@ def test_layers_view():
# TODO: This can be flaky. Make that stop
def test_view_of_view(matrix_type, subset_func, subset_func2):
adata = gen_adata((30, 15), X_type=matrix_type)
- adata.raw = adata
+ adata.raw = adata.copy()
if subset_func is single_subset:
pytest.xfail("Other subset generating functions have trouble with this")
var_s1 = subset_func(adata.var_names, min_size=4)
@@ -786,6 +790,30 @@ def test_dataframe_view_index_setting():
assert a2.obs.index.values.tolist() == ["a", "b"]
+def test_ellipsis_index(
+ ellipsis_index: tuple[EllipsisType | slice, ...] | EllipsisType,
+ equivalent_ellipsis_index: tuple[slice, slice],
+ matrix_type,
+):
+ adata = gen_adata((10, 10), X_type=matrix_type, **GEN_ADATA_DASK_ARGS)
+ subset_ellipsis = adata[ellipsis_index]
+ subset = adata[equivalent_ellipsis_index]
+ assert_equal(subset_ellipsis, subset)
+
+
+@pytest.mark.parametrize(
+ ("index", "expected_error"),
+ [
+ ((..., 0, ...), r"only have a single ellipsis"),
+ ((0, 0, 0), r"Received a length 3 index"),
+ ],
+ ids=["ellipsis-int-ellipsis", "int-int-int"],
+)
+def test_index_3d_errors(index: tuple[int | EllipsisType, ...], expected_error: str):
+ with pytest.raises(IndexError, match=expected_error):
+ gen_adata((10, 10))[index]
+
+
# @pytest.mark.parametrize("dim", ["obs", "var"])
# @pytest.mark.parametrize(
# ("idx", "pat"),
diff --git a/tests/test_x.py b/tests/test_x.py
index 2b4504158..64b1bb87d 100644
--- a/tests/test_x.py
+++ b/tests/test_x.py
@@ -182,3 +182,12 @@ def test_set_dense_x_view_from_sparse():
assert_equal(view.X, x1[:30])
assert_equal(orig.X[:30], x1[:30]) # change propagates through
assert_equal(orig.X[30:], x[30:]) # change propagates through
+
+
+def test_warn_on_non_csr_csc_matrix():
+ X = sparse.eye(100)
+ with pytest.warns(
+ FutureWarning,
+ match=rf"AnnData previously had undefined behavior around matrices of type {type(X)}.*",
+ ):
+ ad.AnnData(X=X)