diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ffb5c036..37407f24 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -25,3 +25,10 @@ repos: rev: 2024.08.19 hooks: - id: sp-repo-review + + - repo: https://github.com/pre-commit/mirrors-mypy + rev: 'v1.4.0' + hooks: + - id: mypy + args: [--config-file, pyproject.toml] + additional_dependencies: [numpy, pytest, zfpy, 'zarr==3.0.0b1'] diff --git a/docs/conf.py b/docs/conf.py index 800be7be..8e6adca6 100755 --- a/docs/conf.py +++ b/docs/conf.py @@ -12,7 +12,6 @@ # All configuration values have a default; values that are commented out # serve to show the default. - import os import sys from unittest.mock import Mock as MagicMock @@ -232,7 +231,7 @@ def __getattr__(cls, name): # -- Options for LaTeX output --------------------------------------------- -latex_elements = { +latex_elements: dict[str, str] = { # The paper size ('letterpaper' or 'a4paper'). #'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). diff --git a/docs/release.rst b/docs/release.rst index e87d7078..456b1e85 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -26,6 +26,8 @@ Enhancements By :user:`Norman Rzepka `, :issue:`613`. * Add codec wrappers for Zarr 3. By :user:`Norman Rzepka `, :issue:`524` +* Added mypy type checking to continuous integration. + By :user:`David Stansby `, :issue:`460`. Maintenance ~~~~~~~~~~~ diff --git a/numcodecs/abc.py b/numcodecs/abc.py index d7328be4..9fdfd27e 100644 --- a/numcodecs/abc.py +++ b/numcodecs/abc.py @@ -29,13 +29,14 @@ """ from abc import ABC, abstractmethod +from typing import Optional class Codec(ABC): """Codec abstract base class.""" # override in sub-class - codec_id = None + codec_id: Optional[str] = None """Codec identifier.""" @abstractmethod diff --git a/numcodecs/checksum32.py b/numcodecs/checksum32.py index 6246ab1c..d5f6a8da 100644 --- a/numcodecs/checksum32.py +++ b/numcodecs/checksum32.py @@ -1,6 +1,7 @@ import struct import zlib -from typing import Literal +from collections.abc import Callable +from typing import TYPE_CHECKING, Literal import numpy as np @@ -8,12 +9,15 @@ from .compat import ensure_contiguous_ndarray, ndarray_copy from .jenkins import jenkins_lookup3 +if TYPE_CHECKING: + from typing_extensions import Buffer + CHECKSUM_LOCATION = Literal['start', 'end'] class Checksum32(Codec): # override in sub-class - checksum = None + checksum: Callable[["Buffer", int], int] | None = None location: CHECKSUM_LOCATION = 'start' def __init__(self, location: CHECKSUM_LOCATION | None = None): diff --git a/numcodecs/compat.py b/numcodecs/compat.py index bbda3a46..d1844e10 100644 --- a/numcodecs/compat.py +++ b/numcodecs/compat.py @@ -100,7 +100,7 @@ def ensure_contiguous_ndarray_like(buf, max_buffer_size=None, flatten=True) -> N # check for datetime or timedelta ndarray, the buffer interface doesn't support those if arr.dtype.kind in "Mm": - arr = arr.view(np.int64) + arr = arr.view(np.int64) # type: ignore[arg-type] # check memory is contiguous, if so flatten if arr.flags.c_contiguous or arr.flags.f_contiguous: @@ -117,7 +117,7 @@ def ensure_contiguous_ndarray_like(buf, max_buffer_size=None, flatten=True) -> N return arr -def ensure_contiguous_ndarray(buf, max_buffer_size=None, flatten=True) -> np.array: +def ensure_contiguous_ndarray(buf, max_buffer_size=None, flatten=True) -> np.ndarray: """Convenience function to coerce `buf` to a numpy array, if it is not already a numpy array. Also ensures that the returned value exports fully contiguous memory, and supports the new-style buffer interface. If the optional max_buffer_size is diff --git a/numcodecs/lzma.py b/numcodecs/lzma.py index cec66c22..32fb528b 100644 --- a/numcodecs/lzma.py +++ b/numcodecs/lzma.py @@ -1,11 +1,14 @@ -import contextlib +from types import ModuleType +from typing import Optional -_lzma = None +_lzma: Optional[ModuleType] = None try: import lzma as _lzma except ImportError: # pragma: no cover - with contextlib.suppress(ImportError): - from backports import lzma as _lzma + try: + from backports import lzma as _lzma # type: ignore[no-redef] + except ImportError: + pass if _lzma: diff --git a/numcodecs/ndarray_like.py b/numcodecs/ndarray_like.py index 70035a5c..f70d7b45 100644 --- a/numcodecs/ndarray_like.py +++ b/numcodecs/ndarray_like.py @@ -1,7 +1,7 @@ from typing import Any, ClassVar, Optional, Protocol, runtime_checkable -class _CachedProtocolMeta(Protocol.__class__): +class _CachedProtocolMeta(Protocol.__class__): # type: ignore[name-defined] """Custom implementation of @runtime_checkable The native implementation of @runtime_checkable is slow, diff --git a/numcodecs/registry.py b/numcodecs/registry.py index 045ef0b8..c923e7f0 100644 --- a/numcodecs/registry.py +++ b/numcodecs/registry.py @@ -2,11 +2,13 @@ applications to dynamically register and look-up codec classes.""" import logging -from importlib.metadata import entry_points +from importlib.metadata import EntryPoints, entry_points + +from numcodecs.abc import Codec logger = logging.getLogger("numcodecs") -codec_registry = {} -entries = {} +codec_registry: dict[str, Codec] = {} +entries: dict[str, "EntryPoints"] = {} def run_entrypoints(): diff --git a/numcodecs/tests/test_lzma.py b/numcodecs/tests/test_lzma.py index d93aecfd..5e05110c 100644 --- a/numcodecs/tests/test_lzma.py +++ b/numcodecs/tests/test_lzma.py @@ -1,5 +1,7 @@ import itertools import unittest +from types import ModuleType +from typing import cast import numpy as np import pytest @@ -20,6 +22,8 @@ check_repr, ) +_lzma = cast(ModuleType, _lzma) + codecs = [ LZMA(), LZMA(preset=1), diff --git a/numcodecs/tests/test_zarr3.py b/numcodecs/tests/test_zarr3.py index ec1a398e..afb77136 100644 --- a/numcodecs/tests/test_zarr3.py +++ b/numcodecs/tests/test_zarr3.py @@ -1,11 +1,19 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import numpy as np import pytest -zarr = pytest.importorskip("zarr") +if not TYPE_CHECKING: + zarr = pytest.importorskip("zarr") +else: + import zarr + +import zarr.storage +from zarr.core.common import JSON -import numcodecs.zarr3 # noqa: E402 +import numcodecs.zarr3 pytestmark = [ pytest.mark.skipif(zarr.__version__ < "3.0.0", reason="zarr 3.0.0 or later is required"), @@ -17,7 +25,6 @@ get_codec_class = zarr.registry.get_codec_class Array = zarr.Array -JSON = zarr.core.common.JSON BytesCodec = zarr.codecs.BytesCodec Store = zarr.abc.store.Store MemoryStore = zarr.storage.MemoryStore @@ -28,7 +35,7 @@ @pytest.fixture -def store() -> Store: +def store() -> StorePath: return StorePath(MemoryStore(mode="w")) @@ -43,6 +50,8 @@ def test_entry_points(codec_class: type[numcodecs.zarr3._NumcodecsCodec]): @pytest.mark.parametrize("codec_class", ALL_CODECS) def test_docstring(codec_class: type[numcodecs.zarr3._NumcodecsCodec]): + if codec_class.__doc__ is None: + pytest.skip() assert "See :class:`numcodecs." in codec_class.__doc__ @@ -59,7 +68,7 @@ def test_docstring(codec_class: type[numcodecs.zarr3._NumcodecsCodec]): numcodecs.zarr3.Shuffle, ], ) -def test_generic_codec_class(store: Store, codec_class: type[numcodecs.zarr3._NumcodecsCodec]): +def test_generic_codec_class(store: StorePath, codec_class: type[numcodecs.zarr3._NumcodecsCodec]): data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) with pytest.warns(UserWarning, match=EXPECTED_WARNING_STR): @@ -92,7 +101,9 @@ def test_generic_codec_class(store: Store, codec_class: type[numcodecs.zarr3._Nu ], ) def test_generic_filter( - store: Store, codec_class: type[numcodecs.zarr3._NumcodecsCodec], codec_config: dict[str, JSON] + store: StorePath, + codec_class: type[numcodecs.zarr3._NumcodecsCodec], + codec_config: dict[str, JSON], ): data = np.linspace(0, 10, 256, dtype="float32").reshape((16, 16)) @@ -114,7 +125,7 @@ def test_generic_filter( np.testing.assert_array_equal(data, a[:, :]) -def test_generic_filter_bitround(store: Store): +def test_generic_filter_bitround(store: StorePath): data = np.linspace(0, 1, 256, dtype="float32").reshape((16, 16)) with pytest.warns(UserWarning, match=EXPECTED_WARNING_STR): @@ -132,7 +143,7 @@ def test_generic_filter_bitround(store: Store): assert np.allclose(data, a[:, :], atol=0.1) -def test_generic_filter_quantize(store: Store): +def test_generic_filter_quantize(store: StorePath): data = np.linspace(0, 10, 256, dtype="float32").reshape((16, 16)) with pytest.warns(UserWarning, match=EXPECTED_WARNING_STR): @@ -150,7 +161,7 @@ def test_generic_filter_quantize(store: Store): assert np.allclose(data, a[:, :], atol=0.001) -def test_generic_filter_packbits(store: Store): +def test_generic_filter_packbits(store: StorePath): data = np.zeros((16, 16), dtype="bool") data[0:4, :] = True @@ -189,7 +200,7 @@ def test_generic_filter_packbits(store: Store): numcodecs.zarr3.JenkinsLookup3, ], ) -def test_generic_checksum(store: Store, codec_class: type[numcodecs.zarr3._NumcodecsCodec]): +def test_generic_checksum(store: StorePath, codec_class: type[numcodecs.zarr3._NumcodecsCodec]): data = np.linspace(0, 10, 256, dtype="float32").reshape((16, 16)) with pytest.warns(UserWarning, match=EXPECTED_WARNING_STR): @@ -208,7 +219,7 @@ def test_generic_checksum(store: Store, codec_class: type[numcodecs.zarr3._Numco @pytest.mark.parametrize("codec_class", [numcodecs.zarr3.PCodec, numcodecs.zarr3.ZFPY]) -def test_generic_bytes_codec(store: Store, codec_class: type[numcodecs.zarr3._NumcodecsCodec]): +def test_generic_bytes_codec(store: StorePath, codec_class: type[numcodecs.zarr3._NumcodecsCodec]): try: codec_class()._codec # noqa: B018 except ValueError as e: diff --git a/numcodecs/tests/test_zfpy.py b/numcodecs/tests/test_zfpy.py index dce3c4d0..e48c3cb7 100644 --- a/numcodecs/tests/test_zfpy.py +++ b/numcodecs/tests/test_zfpy.py @@ -1,3 +1,6 @@ +from types import ModuleType +from typing import cast + import numpy as np import pytest @@ -17,6 +20,9 @@ check_repr, ) +_zfpy = cast(ModuleType, _zfpy) + + codecs = [ ZFPY(mode=_zfpy.mode_fixed_rate, rate=-1), ZFPY(), diff --git a/numcodecs/zarr3.py b/numcodecs/zarr3.py index 811ab501..fb579839 100644 --- a/numcodecs/zarr3.py +++ b/numcodecs/zarr3.py @@ -26,7 +26,6 @@ import asyncio import math -from collections.abc import Callable from dataclasses import dataclass, replace from functools import cached_property, partial from typing import Any, Self, TypeVar @@ -45,6 +44,7 @@ raise ImportError("zarr 3.0.0 or later is required to use the numcodecs zarr integration.") from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec +from zarr.abc.metadata import Metadata from zarr.core.array_spec import ArraySpec from zarr.core.buffer import Buffer, BufferPrototype, NDBuffer from zarr.core.buffer.cpu import as_numpy_array_wrapper @@ -72,11 +72,11 @@ def _parse_codec_configuration(data: dict[str, JSON]) -> dict[str, JSON]: @dataclass(frozen=True) -class _NumcodecsCodec: +class _NumcodecsCodec(Metadata): codec_name: str codec_config: dict[str, JSON] - def __init__(self, **codec_config: dict[str, JSON]) -> None: + def __init__(self, **codec_config: JSON) -> None: if not self.codec_name: raise ValueError( "The codec name needs to be supplied through the `codec_name` attribute." @@ -106,7 +106,7 @@ def from_dict(cls, data: dict[str, JSON]) -> Self: codec_config = _parse_codec_configuration(data) return cls(**codec_config) - def to_dict(self) -> JSON: + def to_dict(self) -> dict[str, JSON]: codec_config = self.codec_config.copy() return { "name": self.codec_name, @@ -118,7 +118,7 @@ def compute_encoded_size(self, input_byte_length: int, chunk_spec: ArraySpec) -> class _NumcodecsBytesBytesCodec(_NumcodecsCodec, BytesBytesCodec): - def __init__(self, **codec_config: dict[str, JSON]) -> None: + def __init__(self, **codec_config: JSON) -> None: super().__init__(**codec_config) async def _decode_single(self, chunk_bytes: Buffer, chunk_spec: ArraySpec) -> Buffer: @@ -140,7 +140,7 @@ async def _encode_single(self, chunk_bytes: Buffer, chunk_spec: ArraySpec) -> Bu class _NumcodecsArrayArrayCodec(_NumcodecsCodec, ArrayArrayCodec): - def __init__(self, **codec_config: dict[str, JSON]) -> None: + def __init__(self, **codec_config: JSON) -> None: super().__init__(**codec_config) async def _decode_single(self, chunk_array: NDBuffer, chunk_spec: ArraySpec) -> NDBuffer: @@ -155,7 +155,7 @@ async def _encode_single(self, chunk_array: NDBuffer, chunk_spec: ArraySpec) -> class _NumcodecsArrayBytesCodec(_NumcodecsCodec, ArrayBytesCodec): - def __init__(self, **codec_config: dict[str, JSON]) -> None: + def __init__(self, **codec_config: JSON) -> None: super().__init__(**codec_config) async def _decode_single(self, chunk_buffer: Buffer, chunk_spec: ArraySpec) -> NDBuffer: @@ -179,7 +179,7 @@ def _add_docstring(cls: type[T], ref_class_name: str) -> type[T]: return cls -def _add_docstring_wrapper(ref_class_name: str) -> Callable[[type[T]], type[T]]: +def _add_docstring_wrapper(ref_class_name: str) -> partial: return partial(_add_docstring, ref_class_name=ref_class_name) @@ -190,7 +190,7 @@ def _make_bytes_bytes_codec(codec_name: str, cls_name: str) -> type[_NumcodecsBy class _Codec(_NumcodecsBytesBytesCodec): codec_name = _codec_name - def __init__(self, **codec_config: dict[str, JSON]) -> None: + def __init__(self, **codec_config: JSON) -> None: super().__init__(**codec_config) _Codec.__name__ = cls_name @@ -204,7 +204,7 @@ def _make_array_array_codec(codec_name: str, cls_name: str) -> type[_NumcodecsAr class _Codec(_NumcodecsArrayArrayCodec): codec_name = _codec_name - def __init__(self, **codec_config: dict[str, JSON]) -> None: + def __init__(self, **codec_config: JSON) -> None: super().__init__(**codec_config) _Codec.__name__ = cls_name @@ -218,7 +218,7 @@ def _make_array_bytes_codec(codec_name: str, cls_name: str) -> type[_NumcodecsAr class _Codec(_NumcodecsArrayBytesCodec): codec_name = _codec_name - def __init__(self, **codec_config: dict[str, JSON]) -> None: + def __init__(self, **codec_config: JSON) -> None: super().__init__(**codec_config) _Codec.__name__ = cls_name @@ -232,7 +232,7 @@ def _make_checksum_codec(codec_name: str, cls_name: str) -> type[_NumcodecsBytes class _ChecksumCodec(_NumcodecsBytesBytesCodec): codec_name = _codec_name - def __init__(self, **codec_config: dict[str, JSON]) -> None: + def __init__(self, **codec_config: JSON) -> None: super().__init__(**codec_config) def compute_encoded_size(self, input_byte_length: int, chunk_spec: ArraySpec) -> int: @@ -256,10 +256,10 @@ def compute_encoded_size(self, input_byte_length: int, chunk_spec: ArraySpec) -> class Shuffle(_NumcodecsBytesBytesCodec): codec_name = f"{CODEC_PREFIX}shuffle" - def __init__(self, **codec_config: dict[str, JSON]) -> None: + def __init__(self, **codec_config: JSON) -> None: super().__init__(**codec_config) - def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self: + def evolve_from_array_spec(self, array_spec: ArraySpec) -> Shuffle: if array_spec.dtype.itemsize != self.codec_config.get("elementsize"): return Shuffle(**{**self.codec_config, "elementsize": array_spec.dtype.itemsize}) return self # pragma: no cover @@ -276,15 +276,15 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self: class FixedScaleOffset(_NumcodecsArrayArrayCodec): codec_name = f"{CODEC_PREFIX}fixedscaleoffset" - def __init__(self, **codec_config: dict[str, JSON]) -> None: + def __init__(self, **codec_config: JSON) -> None: super().__init__(**codec_config) def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: if astype := self.codec_config.get("astype"): - return replace(chunk_spec, dtype=np.dtype(astype)) + return replace(chunk_spec, dtype=np.dtype(astype)) # type: ignore[arg-type] return chunk_spec - def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self: + def evolve_from_array_spec(self, array_spec: ArraySpec) -> FixedScaleOffset: if str(array_spec.dtype) != self.codec_config.get("dtype"): return FixedScaleOffset(**{**self.codec_config, "dtype": str(array_spec.dtype)}) return self @@ -294,10 +294,10 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self: class Quantize(_NumcodecsArrayArrayCodec): codec_name = f"{CODEC_PREFIX}quantize" - def __init__(self, **codec_config: dict[str, JSON]) -> None: + def __init__(self, **codec_config: JSON) -> None: super().__init__(**codec_config) - def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self: + def evolve_from_array_spec(self, array_spec: ArraySpec) -> Quantize: if str(array_spec.dtype) != self.codec_config.get("dtype"): return Quantize(**{**self.codec_config, "dtype": str(array_spec.dtype)}) return self @@ -307,7 +307,7 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self: class PackBits(_NumcodecsArrayArrayCodec): codec_name = f"{CODEC_PREFIX}packbits" - def __init__(self, **codec_config: dict[str, JSON]) -> None: + def __init__(self, **codec_config: JSON) -> None: super().__init__(**codec_config) def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: @@ -326,13 +326,13 @@ def validate(self, *, dtype: np.dtype[Any], **_kwargs) -> None: class AsType(_NumcodecsArrayArrayCodec): codec_name = f"{CODEC_PREFIX}astype" - def __init__(self, **codec_config: dict[str, JSON]) -> None: + def __init__(self, **codec_config: JSON) -> None: super().__init__(**codec_config) def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: - return replace(chunk_spec, dtype=np.dtype(self.codec_config["encode_dtype"])) + return replace(chunk_spec, dtype=np.dtype(self.codec_config["encode_dtype"])) # type: ignore[arg-type] - def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self: + def evolve_from_array_spec(self, array_spec: ArraySpec) -> AsType: decode_dtype = self.codec_config.get("decode_dtype") if str(array_spec.dtype) != decode_dtype: return AsType(**{**self.codec_config, "decode_dtype": str(array_spec.dtype)}) diff --git a/numcodecs/zfpy.py b/numcodecs/zfpy.py index d0f9773d..eda4bfc2 100644 --- a/numcodecs/zfpy.py +++ b/numcodecs/zfpy.py @@ -1,8 +1,10 @@ import warnings from contextlib import suppress from importlib.metadata import PackageNotFoundError, version +from types import ModuleType +from typing import Optional -_zfpy = None +_zfpy: Optional[ModuleType] = None _zfpy_version: tuple = () with suppress(PackageNotFoundError): @@ -21,7 +23,7 @@ ) else: with suppress(ImportError): - import zfpy as _zfpy + import zfpy as _zfpy # type: ignore[no-redef] if _zfpy: import numpy as np diff --git a/pyproject.toml b/pyproject.toml index 5a1701f1..3b97c5cc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -200,3 +200,11 @@ ignore = [ [tool.ruff.format] quote-style = "preserve" + +[tool.mypy] +ignore_errors = false +ignore_missing_imports = true +enable_error_code = ["ignore-without-code", "redundant-expr", "truthy-bool"] +# TODO: set options below to true +strict = false +warn_unreachable = false