Merge branch 'master' into as_subchunk_map_cython

ArvidJB · Aug 15, 2024 · f72ec5a · f72ec5a
2 parents b98a541 + 05074a7
commit f72ec5a
Show file tree

Hide file tree

Showing 7 changed files with 67 additions and 20 deletions.
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
@@ -0,0 +1,16 @@
+name: Linting
+
+on:
+  push:
+    branches: [master]
+  pull_request:
+    branches: ['*']
+
+jobs:
+  checks:
+    name: pre-commit hooks
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+      - uses: pre-commit/action@v3.0.1
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -11,7 +11,7 @@ exclude: |
   )
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.4.0
+    rev: v4.6.0
     hooks:
       - id: trailing-whitespace
       - id: end-of-file-fixer
@@ -27,25 +27,25 @@ repos:
       - id: mixed-line-ending
 
   - repo: https://github.com/pre-commit/mirrors-prettier
-    rev: v3.0.3
+    rev: v3.1.0
     hooks:
       - id: prettier
         files: 'docs/'
         types_or: [javascript, ts, tsx, html, css]
 
   # - repo: https://github.com/pre-commit/mirrors-mypy
-  #   rev: v1.7.0
+  #   rev: v1.11.1
   #   hooks:
   #     - id: mypy
   #       args: ['--follow-imports=skip', '--ignore-missing-imports']
 
   - repo: https://github.com/pycqa/isort
-    rev: 5.12.0
+    rev: 5.13.2
     hooks:
       - id: isort
 
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.1.2
+    rev: v0.5.7
     hooks:
       # - id: ruff
       - id: ruff-format

diff --git a/pyproject.toml b/pyproject.toml
@@ -90,7 +90,7 @@ ignore = [
     "W191", # Indentation contains tabs; unnecessary when running ruff-format
 ]
 
-[tool.ruff.per-file-ignores]
+[tool.ruff.lint.per-file-ignores]
 "__init__.py" = ["F401"]
 "test_*.py" = ["ANN001"]
 
@@ -114,3 +114,18 @@ markers = [
     "setup_args : kwargs for setup fixture.",
     "slow: slow tests",
 ]
+
+[tool.mypy]
+allow_incomplete_defs = true  # FIXME
+allow_untyped_decorators = false
+allow_untyped_defs = true  # FIXME
+ignore_missing_imports = true
+no_implicit_optional = true
+show_error_codes = true
+warn_redundant_casts = true
+warn_unused_ignores = true
+warn_unreachable = true
+
+[[tool.mypy.overrides]]
+module = ["*.tests.*"]
+allow_untyped_defs = true
diff --git a/versioned_hdf5/api.py b/versioned_hdf5/api.py
@@ -4,6 +4,7 @@
 Everything outside of this file is considered internal API and is subject to
 change.
 """
+
 import datetime
 import logging
 from contextlib import contextmanager
@@ -111,9 +112,8 @@ def __init__(self, f):
                             "Ugprading data_version to %d, no action required.",
                             DATA_VERSION,
                         )
-                        self.f["_version_data"]["versions"].attrs[
-                            "data_version"
-                        ] = DATA_VERSION
+                        versions = self.f["_version_data"]["versions"]
+                        versions.attrs["data_version"] = DATA_VERSION
 
             elif self.data_version_identifier > DATA_VERSION:
                 raise ValueError(
@@ -152,6 +152,11 @@ def current_version(self):
         """
         return self._versions.attrs["current_version"]
 
+    @current_version.setter
+    def current_version(self, version_name):
+        set_current_version(self.f, version_name)
+        self._version_cache.clear()
+
     @property
     def data_version_identifier(self) -> str:
         """Return the data version identifier.
@@ -180,11 +185,6 @@ def data_version_identifier(self, version: int):
         """
         self.f["_version_data/versions"].attrs["data_version"] = version
 
-    @current_version.setter
-    def current_version(self, version_name):
-        set_current_version(self.f, version_name)
-        self._version_cache.clear()
-
     def get_version_by_name(self, version):
         if version.startswith("/"):
             raise ValueError(

diff --git a/versioned_hdf5/backend.py b/versioned_hdf5/backend.py
@@ -320,7 +320,7 @@ def _verify_new_chunk_reuse(
         assert_array_equal(to_be_reused, to_be_written)
     except AssertionError as e:
         raise ValueError(
-            f"Hash {data_hash} of existing data chunk {reused_chunk} "
+            f"Hash {data_hash!r} of existing data chunk {reused_chunk} "
             f"matches the hash of new data chunk {chunk_being_written}, "
             "but data does not."
         ) from e

diff --git a/versioned_hdf5/hashtable.py b/versioned_hdf5/hashtable.py
@@ -58,7 +58,9 @@ def __init__(self, f, name, *, chunk_size=None, hash_table_name="hash_table"):
 
         if hash_table_name in f["_version_data"][name]:
             self.hash_table_dataset = f["_version_data"][name][hash_table_name]
-            self.hash_table, self._indices = self._load_hashtable(self.hash_table_dataset)
+            self.hash_table, self._indices = self._load_hashtable(
+                self.hash_table_dataset
+            )
         else:
             self.hash_table_dataset = self._create_hashtable()
             self.hash_table = self.hash_table_dataset[:]

diff --git a/versioned_hdf5/wrappers.py b/versioned_hdf5/wrappers.py
@@ -20,7 +20,15 @@
 from h5py._hl.base import guess_dtype, phil, with_phil
 from h5py._hl.dataset import _LEGACY_GZIP_COMPRESSION_VALS
 from h5py._hl.selections import guess_shape
-from ndindex import ChunkSize, Slice, Tuple, ndindex, Integer, IntegerArray, BooleanArray
+from ndindex import (
+    BooleanArray,
+    ChunkSize,
+    Integer,
+    IntegerArray,
+    Slice,
+    Tuple,
+    ndindex,
+)
 
 from .backend import DEFAULT_CHUNK_SIZE
 from .slicetools import build_data_dict
@@ -742,7 +750,9 @@ def get_index(
 
         arr = np.ndarray(idx.newshape(self.shape), new_dtype, order="C")
 
-        for chunk, arr_idx_raw, index_raw in as_subchunk_map(self.chunks, idx, self.shape):
+        for chunk, arr_idx_raw, index_raw in as_subchunk_map(
+            self.chunks, idx, self.shape
+        ):
             if chunk not in self.id.data_dict:
                 self.id.data_dict[chunk] = np.broadcast_to(
                     self.fillvalue, chunk.newshape(self.shape)
@@ -1210,7 +1220,9 @@ def __getitem__(self, index):
         newshape = idx.newshape(self.shape)
         arr = np.full(newshape, self.fillvalue, dtype=self.dtype)
 
-        for c, arr_idx_raw, chunk_idx_raw in as_subchunk_map(self.chunks, idx, self.shape):
+        for c, arr_idx_raw, chunk_idx_raw in as_subchunk_map(
+            self.chunks, idx, self.shape
+        ):
             if c not in self.data_dict:
                 fill = np.broadcast_to(self.fillvalue, c.newshape(self.shape))
                 self.data_dict[c] = fill
@@ -1228,7 +1240,9 @@ def __setitem__(self, index, value):
 
         val = np.broadcast_to(value, idx.newshape(self.shape))
 
-        for c, val_idx_raw, chunk_idx_raw in as_subchunk_map(self.chunks, idx, self.shape):
+        for c, val_idx_raw, chunk_idx_raw in as_subchunk_map(
+            self.chunks, idx, self.shape
+        ):
             if c not in self.data_dict:
                 # Broadcasted arrays do not actually consume memory
                 fill = np.broadcast_to(self.fillvalue, c.newshape(self.shape))