Skip to content

Commit

Permalink
Merge branch 'master' into as_subchunk_map_cython
Browse files Browse the repository at this point in the history
  • Loading branch information
crusaderky committed Aug 15, 2024
2 parents b98a541 + 05074a7 commit f72ec5a
Show file tree
Hide file tree
Showing 7 changed files with 67 additions and 20 deletions.
16 changes: 16 additions & 0 deletions .github/workflows/pre-commit.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
name: Linting

on:
push:
branches: [master]
pull_request:
branches: ['*']

jobs:
checks:
name: pre-commit hooks
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
- uses: pre-commit/action@v3.0.1
10 changes: 5 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ exclude: |
)
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
rev: v4.6.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
Expand All @@ -27,25 +27,25 @@ repos:
- id: mixed-line-ending

- repo: https://github.com/pre-commit/mirrors-prettier
rev: v3.0.3
rev: v3.1.0
hooks:
- id: prettier
files: 'docs/'
types_or: [javascript, ts, tsx, html, css]

# - repo: https://github.com/pre-commit/mirrors-mypy
# rev: v1.7.0
# rev: v1.11.1
# hooks:
# - id: mypy
# args: ['--follow-imports=skip', '--ignore-missing-imports']

- repo: https://github.com/pycqa/isort
rev: 5.12.0
rev: 5.13.2
hooks:
- id: isort

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.1.2
rev: v0.5.7
hooks:
# - id: ruff
- id: ruff-format
Expand Down
17 changes: 16 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ ignore = [
"W191", # Indentation contains tabs; unnecessary when running ruff-format
]

[tool.ruff.per-file-ignores]
[tool.ruff.lint.per-file-ignores]
"__init__.py" = ["F401"]
"test_*.py" = ["ANN001"]

Expand All @@ -114,3 +114,18 @@ markers = [
"setup_args : kwargs for setup fixture.",
"slow: slow tests",
]

[tool.mypy]
allow_incomplete_defs = true # FIXME
allow_untyped_decorators = false
allow_untyped_defs = true # FIXME
ignore_missing_imports = true
no_implicit_optional = true
show_error_codes = true
warn_redundant_casts = true
warn_unused_ignores = true
warn_unreachable = true

[[tool.mypy.overrides]]
module = ["*.tests.*"]
allow_untyped_defs = true
16 changes: 8 additions & 8 deletions versioned_hdf5/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
Everything outside of this file is considered internal API and is subject to
change.
"""

import datetime
import logging
from contextlib import contextmanager
Expand Down Expand Up @@ -111,9 +112,8 @@ def __init__(self, f):
"Ugprading data_version to %d, no action required.",
DATA_VERSION,
)
self.f["_version_data"]["versions"].attrs[
"data_version"
] = DATA_VERSION
versions = self.f["_version_data"]["versions"]
versions.attrs["data_version"] = DATA_VERSION

elif self.data_version_identifier > DATA_VERSION:
raise ValueError(
Expand Down Expand Up @@ -152,6 +152,11 @@ def current_version(self):
"""
return self._versions.attrs["current_version"]

@current_version.setter
def current_version(self, version_name):
set_current_version(self.f, version_name)
self._version_cache.clear()

@property
def data_version_identifier(self) -> str:
"""Return the data version identifier.
Expand Down Expand Up @@ -180,11 +185,6 @@ def data_version_identifier(self, version: int):
"""
self.f["_version_data/versions"].attrs["data_version"] = version

@current_version.setter
def current_version(self, version_name):
set_current_version(self.f, version_name)
self._version_cache.clear()

def get_version_by_name(self, version):
if version.startswith("/"):
raise ValueError(
Expand Down
2 changes: 1 addition & 1 deletion versioned_hdf5/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ def _verify_new_chunk_reuse(
assert_array_equal(to_be_reused, to_be_written)
except AssertionError as e:
raise ValueError(
f"Hash {data_hash} of existing data chunk {reused_chunk} "
f"Hash {data_hash!r} of existing data chunk {reused_chunk} "
f"matches the hash of new data chunk {chunk_being_written}, "
"but data does not."
) from e
Expand Down
4 changes: 3 additions & 1 deletion versioned_hdf5/hashtable.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,9 @@ def __init__(self, f, name, *, chunk_size=None, hash_table_name="hash_table"):

if hash_table_name in f["_version_data"][name]:
self.hash_table_dataset = f["_version_data"][name][hash_table_name]
self.hash_table, self._indices = self._load_hashtable(self.hash_table_dataset)
self.hash_table, self._indices = self._load_hashtable(
self.hash_table_dataset
)
else:
self.hash_table_dataset = self._create_hashtable()
self.hash_table = self.hash_table_dataset[:]
Expand Down
22 changes: 18 additions & 4 deletions versioned_hdf5/wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,15 @@
from h5py._hl.base import guess_dtype, phil, with_phil
from h5py._hl.dataset import _LEGACY_GZIP_COMPRESSION_VALS
from h5py._hl.selections import guess_shape
from ndindex import ChunkSize, Slice, Tuple, ndindex, Integer, IntegerArray, BooleanArray
from ndindex import (
BooleanArray,
ChunkSize,
Integer,
IntegerArray,
Slice,
Tuple,
ndindex,
)

from .backend import DEFAULT_CHUNK_SIZE
from .slicetools import build_data_dict
Expand Down Expand Up @@ -742,7 +750,9 @@ def get_index(

arr = np.ndarray(idx.newshape(self.shape), new_dtype, order="C")

for chunk, arr_idx_raw, index_raw in as_subchunk_map(self.chunks, idx, self.shape):
for chunk, arr_idx_raw, index_raw in as_subchunk_map(
self.chunks, idx, self.shape
):
if chunk not in self.id.data_dict:
self.id.data_dict[chunk] = np.broadcast_to(
self.fillvalue, chunk.newshape(self.shape)
Expand Down Expand Up @@ -1210,7 +1220,9 @@ def __getitem__(self, index):
newshape = idx.newshape(self.shape)
arr = np.full(newshape, self.fillvalue, dtype=self.dtype)

for c, arr_idx_raw, chunk_idx_raw in as_subchunk_map(self.chunks, idx, self.shape):
for c, arr_idx_raw, chunk_idx_raw in as_subchunk_map(
self.chunks, idx, self.shape
):
if c not in self.data_dict:
fill = np.broadcast_to(self.fillvalue, c.newshape(self.shape))
self.data_dict[c] = fill
Expand All @@ -1228,7 +1240,9 @@ def __setitem__(self, index, value):

val = np.broadcast_to(value, idx.newshape(self.shape))

for c, val_idx_raw, chunk_idx_raw in as_subchunk_map(self.chunks, idx, self.shape):
for c, val_idx_raw, chunk_idx_raw in as_subchunk_map(
self.chunks, idx, self.shape
):
if c not in self.data_dict:
# Broadcasted arrays do not actually consume memory
fill = np.broadcast_to(self.fillvalue, c.newshape(self.shape))
Expand Down

0 comments on commit f72ec5a

Please sign in to comment.