Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow read_metadata kwargs to be passed to FileSet.__init__ #79

Merged
merged 7 commits into from
Sep 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,4 @@ repos:
--non-interactive,
]
exclude: tests
additional_dependencies: [pytest]
additional_dependencies: [pytest, attrs, imageio]
7 changes: 4 additions & 3 deletions conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,12 @@
if os.getenv("_PYTEST_RAISE", "0") != "0":

@pytest.hookimpl(tryfirst=True)
def pytest_exception_interact(call: ty.Any) -> None:
raise call.excinfo.value
def pytest_exception_interact(call: pytest.CallInfo[ty.Any]) -> None:
if call.excinfo is not None:
raise call.excinfo.value

@pytest.hookimpl(tryfirst=True)
def pytest_internalerror(excinfo: ty.Any) -> None:
def pytest_internalerror(excinfo: pytest.ExceptionInfo[BaseException]) -> None:
raise excinfo.value


Expand Down
6 changes: 4 additions & 2 deletions extras/fileformats/extras/application/medical.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,13 @@

@extra_implementation(FileSet.read_metadata)
def dicom_read_metadata(
dicom: Dicom, selected_keys: ty.Optional[ty.Collection[str]] = None
dicom: Dicom,
specific_tags: ty.Optional[ty.Collection[str]] = None,
**kwargs: ty.Any,
) -> ty.Mapping[str, ty.Any]:
dcm = pydicom.dcmread(
dicom.fspath,
specific_tags=list(selected_keys if selected_keys is not None else []),
specific_tags=list(specific_tags if specific_tags is not None else []),
)
[getattr(dcm, a, None) for a in dir(dcm)] # Ensure all keywords are set
metadata = {
Expand Down
2 changes: 1 addition & 1 deletion extras/fileformats/extras/application/serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def convert_data_serialization(
output_path = out_dir / (
in_file.fspath.stem + (output_format.ext if output_format.ext else "")
)
return output_format.save_new(output_path, dct)
return output_format.new(output_path, dct)


@extra_implementation(DataSerialization.load)
Expand Down
4 changes: 2 additions & 2 deletions extras/fileformats/extras/image/converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@ def convert_image(
output_format: ty.Type[RasterImage],
out_dir: ty.Optional[Path] = None,
) -> RasterImage:
data_array = in_file.read_data()
data_array = in_file.load()
if out_dir is None:
out_dir = Path(tempfile.mkdtemp())
output_path = out_dir / (
in_file.fspath.stem + (output_format.ext if output_format.ext else "")
)
return output_format.save_new(output_path, data_array)
return output_format.new(output_path, data_array)
10 changes: 6 additions & 4 deletions extras/fileformats/extras/image/readwrite.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
import imageio
import numpy # noqa: F401
import typing # noqa: F401
from fileformats.core import extra_implementation
from fileformats.image.raster import RasterImage, DataArrayType


@extra_implementation(RasterImage.read_data)
@extra_implementation(RasterImage.load)
def read_raster_data(image: RasterImage) -> DataArrayType:
return imageio.imread(image.fspath) # type: ignore


@extra_implementation(RasterImage.write_data)
def write_raster_data(image: RasterImage, data_array: DataArrayType) -> None:
imageio.imwrite(image.fspath, data_array)
@extra_implementation(RasterImage.save)
def write_raster_data(image: RasterImage, data: DataArrayType) -> None:
imageio.imwrite(image.fspath, data)
21 changes: 2 additions & 19 deletions fileformats/application/serialization.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import json
import typing as ty
from fileformats.core.typing import Self, TypeAlias
from fileformats.core.typing import TypeAlias
from pathlib import Path
from fileformats.core import extra, DataType, FileSet, extra_implementation
from fileformats.core import DataType, FileSet, extra_implementation
from fileformats.core.mixin import WithClassifiers
from ..generic import File
from fileformats.core.exceptions import FormatMismatchError
Expand Down Expand Up @@ -44,23 +44,6 @@ class DataSerialization(WithClassifiers, File):

iana_mime: ty.Optional[str] = None

@extra
def load(self) -> LoadedSerialization:
"""Load the contents of the file into a dictionary"""
raise NotImplementedError

@extra
def save(self, data: LoadedSerialization) -> None:
"""Serialise a dictionary to a new file"""
raise NotImplementedError

@classmethod
def save_new(cls, fspath: ty.Union[str, Path], data: LoadedSerialization) -> Self:
# We have to use a mock object as the data file hasn't been written yet
mock = cls.mock(fspath)
mock.save(data)
return cls(fspath)


class Xml(DataSerialization):
ext: ty.Optional[str] = ".xml"
Expand Down
80 changes: 63 additions & 17 deletions fileformats/core/extras.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
try:
return dispatch_method(obj, *args, **kwargs)
except NotImplementedError:
msg = f"No implementation for '{method.__name__}' extra for {cls.__name__} types"
msg = f"No implementation for {method.__name__!r} extra for {cls.__name__} types"

Check warning on line 36 in fileformats/core/extras.py

View check run for this annotation

Codecov / codecov/patch

fileformats/core/extras.py#L36

Added line #L36 was not covered by tests
for xtra in extras:
if not xtra.imported:
try:
Expand Down Expand Up @@ -72,25 +72,56 @@
def decorator(implementation: ExtraImplementation) -> ExtraImplementation:
msig = inspect.signature(method)
fsig = inspect.signature(implementation)
msig_args = list(msig.parameters.values())[1:]
fsig_args = list(fsig.parameters.values())[1:]
differences = []

def type_match(a: ty.Union[str, type], b: ty.Union[str, type]) -> bool:
return isinstance(a, str) or isinstance(b, str) or a == b

differences = []
for i, (mparam, fparam) in enumerate(
zip_longest(
list(msig.parameters.values())[1:], list(fsig.parameters.values())[1:]
return (
a is ty.Any # type: ignore[comparison-overlap]
or a == b
or inspect.isclass(a)
and inspect.isclass(b)
and issubclass(b, a)
)
):

mhas_kwargs = msig_args and msig_args[-1].kind == inspect.Parameter.VAR_KEYWORD
fhas_kwargs = fsig_args and fsig_args[-1].kind == inspect.Parameter.VAR_KEYWORD
if mhas_kwargs:
mkwargs = msig_args.pop()
if fhas_kwargs:
fkwargs = fsig_args.pop()
mkwargs_type = mkwargs.annotation
fkwargs_type = fkwargs.annotation
if not type_match(mkwargs_type, fkwargs_type):
differences.append(

Check warning on line 97 in fileformats/core/extras.py

View check run for this annotation

Codecov / codecov/patch

fileformats/core/extras.py#L97

Added line #L97 was not covered by tests
f"Type of keyword args: {mkwargs_type!r} vs {fkwargs_type!r}"
)
if isinstance(mkwargs_type, str) and not isinstance(
fkwargs_type, str
):
differences.append(

Check warning on line 103 in fileformats/core/extras.py

View check run for this annotation

Codecov / codecov/patch

fileformats/core/extras.py#L103

Added line #L103 was not covered by tests
"Note that the type of keyword args is annotated using a "
"string so the implementing method also needs to be a "
f'string, i.e. "{mkwargs_type}" instead of {fkwargs_type}'
)
else:
differences.append("variable keywords vs non-variable keywords")

Check warning on line 109 in fileformats/core/extras.py

View check run for this annotation

Codecov / codecov/patch

fileformats/core/extras.py#L109

Added line #L109 was not covered by tests
elif fhas_kwargs:
differences.append("non-variable keywords vs variable keywords")
fsig_args.pop()

Check warning on line 112 in fileformats/core/extras.py

View check run for this annotation

Codecov / codecov/patch

fileformats/core/extras.py#L111-L112

Added lines #L111 - L112 were not covered by tests

for i, (mparam, fparam) in enumerate(zip_longest(msig_args, fsig_args)):
if mparam is None:
differences.append(
f"found additional argument, '{fparam.name}', at position {i}"
)
if not mhas_kwargs:
differences.append(
f"found additional argument, {fparam.name!r}, at position {i}"
)
continue
if fparam is None:
if mparam.default is mparam.empty:
differences.append(
f"override missing required argument '{mparam.name}'"
f"override missing required argument {mparam.name!r}"
)
continue
mname = mparam.name
Expand All @@ -99,18 +130,33 @@
ftype = fparam.annotation
if mname != fname:
differences.append(
f"name of parameter at position {i}: {mname} vs {fname}"
f"name of parameter at position {i}: {mname!r} vs {fname!r}"
)
elif not type_match(mtype, ftype):
differences.append(f"Type of '{mname}' arg: {mtype} vs {ftype}")
differences.append(f"Type of {mname!r} arg: {mtype!r} vs {ftype!r}")
if isinstance(mtype, str) and not isinstance(ftype, str):
differences.append(

Check warning on line 138 in fileformats/core/extras.py

View check run for this annotation

Codecov / codecov/patch

fileformats/core/extras.py#L138

Added line #L138 was not covered by tests
f"Note that the type of {mname!r} is annotated using a string so the "
"implementing method also needs to be a string, i.e. "
f'"{ftype}" instead of {ftype}'
)
if not type_match(msig.return_annotation, fsig.return_annotation):
differences.append(
f"return type: {msig.return_annotation} vs {fsig.return_annotation}"
f"return type: {msig.return_annotation!r} vs {fsig.return_annotation!r}"
)
if isinstance(msig.return_annotation, str) and not isinstance(
fsig.return_annotation, str
):
differences.append(

Check warning on line 150 in fileformats/core/extras.py

View check run for this annotation

Codecov / codecov/patch

fileformats/core/extras.py#L150

Added line #L150 was not covered by tests
"Note that the return type of is annotated using a string so the "
"implementing method also needs to be a string, i.e. "
f'"{fsig.return_annotation}" instead of {fsig.return_annotation}'
)

if differences:
raise TypeError(
f"Arguments differ between the signature of the "
f"decorated method {method} and the registered override {implementation}:\n"
f"Arguments differ between the signature of the extras hook method "
f"{method} and the implementing function {implementation}:\n"
+ "\n".join(differences)
)
dispatch_method.register(implementation)
Expand Down
75 changes: 61 additions & 14 deletions fileformats/core/fileset.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,14 @@ class FileSet(DataType):

Parameters
----------
fspaths : set[Path]
*fspaths : Path | str | FileSet | Collection[Path | str | FileSet]
a set of file-system paths pointing to all the resources in the file-set
metadata : dict[str, Any]
metadata associated with the file-set, typically lazily loaded via `read_metadata`
extra hook but can be provided directly at the time of instantiation
metadata_keys : list[str]
the keys of the metadata to load when the `metadata` property is called. Provided
**metadata_keys : ty.Any
Any keyword arguments to be passed through to the read_metadata function when
loading metadata to fill the `metadata` property. Provided
to allow for selective loading of metadata fields for performance reasons.
"""

Expand All @@ -91,18 +92,20 @@ class FileSet(DataType):
# Member attributes
fspaths: ty.FrozenSet[Path]
_explicit_metadata: ty.Optional[ty.Mapping[str, ty.Any]]
_metadata_keys: ty.Optional[ty.Collection[str]]
_metadata_kwargs: ty.Dict[str, ty.Any]

def __init__(
self,
fspaths: FspathsInputType,
*fspaths: FspathsInputType,
metadata: ty.Optional[ty.Dict[str, ty.Any]] = None,
metadata_keys: ty.Optional[ty.Collection[str]] = None,
**metadata_kwargs: ty.Any,
):
self._explicit_metadata = metadata
self._metadata_keys = metadata_keys
self._metadata_kwargs = metadata_kwargs
self._validate_class()
self.fspaths = fspaths_converter(fspaths)
self.fspaths = frozenset(
itertools.chain(*(fspaths_converter(p) for p in fspaths))
)
self._validate_fspaths()
self._additional_fspaths()
if metadata and not isinstance(metadata, dict):
Expand Down Expand Up @@ -175,6 +178,52 @@ def __hash__(self) -> int:
def __repr__(self) -> str:
return f"{self.type_name}('" + "', '".join(str(p) for p in self.fspaths) + "')"

@extra
def load(self) -> ty.Any:
"""Load the contents of the file into an object of type that make sense for the
datat type

Returns
-------
Any
the data loaded from the file in an type to the format
"""

@extra
def save(self, data: ty.Any) -> None:
"""Load new contents from a format-specific object

Parameters
----------
data: Any
the data to be saved to the file in a type that matches the one loaded by
the `load` method
"""

@classmethod
def new(cls, fspath: ty.Union[str, Path], data: ty.Any) -> Self:
"""Create a new file-set object with the given data saved to the file

Parameters
----------
fspath: str | Path
the file-system path to save the data to. Additional paths should be
able to be inferred from this path
data: Any
the data to be saved to the file in a type that matches the one loaded by
the `load` method

Returns
-------
FileSet
a new file-set object with the given data saved to the file
"""
# We have to use a mock object as the data file hasn't been written yet so can't
# be validated
mock = cls.mock(fspath)
mock.save(data)
return cls(fspath)

@property
def parent(self) -> Path:
"A common parent directory for all the top-level paths in the file-set"
Expand Down Expand Up @@ -250,7 +299,7 @@ def metadata(self) -> ty.Mapping[str, ty.Any]:
if self._explicit_metadata is not None:
return self._explicit_metadata
try:
metadata = self.read_metadata(selected_keys=self._metadata_keys)
metadata = self.read_metadata(**self._metadata_kwargs)
except FileFormatsExtrasPkgUninstalledError:
raise
except FileFormatsExtrasPkgNotCheckedError as e:
Expand All @@ -261,15 +310,13 @@ def metadata(self) -> ty.Mapping[str, ty.Any]:
return metadata

@extra
def read_metadata(
self, selected_keys: ty.Optional[ty.Collection[str]] = None
) -> ty.Mapping[str, ty.Any]:
def read_metadata(self, **kwargs: ty.Any) -> ty.Mapping[str, ty.Any]:
"""Reads any metadata associated with the fileset and returns it as a dict

Parameters
----------
selected_keys : Sequence[str], optional
selected keys to load instead of loading the complete metadata set
**kwargs : Any
any format-specific keyword arguments to pass to the metadata reader

Returns
-------
Expand Down
26 changes: 12 additions & 14 deletions fileformats/core/mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,12 +183,8 @@
def header(self) -> "fileformats.core.FileSet":
return self.header_type(self.select_by_ext(self.header_type)) # type: ignore[attr-defined]

def read_metadata(
self, selected_keys: ty.Optional[ty.Collection[str]] = None
) -> ty.Mapping[str, ty.Any]:
header: ty.Dict[str, ty.Any] = self.header.load() # type: ignore[attr-defined]
if selected_keys:
header = {k: v for k, v in header.items() if k in selected_keys}
def read_metadata(self, **kwargs: ty.Any) -> ty.Mapping[str, ty.Any]:
header: ty.Dict[str, ty.Any] = self.header.load()
return header


Expand Down Expand Up @@ -221,18 +217,20 @@
def side_cars(self) -> ty.Tuple["fileformats.core.FileSet", ...]:
return tuple(tp(self.select_by_ext(tp)) for tp in self.side_car_types) # type: ignore[attr-defined]

def read_metadata(
self, selected_keys: ty.Optional[ty.Collection[str]] = None
) -> ty.Mapping[str, ty.Any]:
metadata: ty.Dict[str, ty.Any] = dict(self.primary_type.read_metadata(self, selected_keys=selected_keys)) # type: ignore[arg-type]
def read_metadata(self, **kwargs: ty.Any) -> ty.Mapping[str, ty.Any]:
metadata: ty.Dict[str, ty.Any] = dict(self.primary_type.read_metadata(self, **kwargs)) # type: ignore[arg-type]
for side_car in self.side_cars:
try:
side_car_metadata: ty.Dict[str, ty.Any] = side_car.load() # type: ignore[attr-defined]
side_car_metadata: ty.Dict[str, ty.Any] = side_car.load()
except AttributeError:
continue
else:
side_car_class_name: str = to_mime_format_name(type(side_car).__name__)
metadata[side_car_class_name] = side_car_metadata
if not isinstance(side_car_metadata, dict):
raise TypeError(

Check warning on line 228 in fileformats/core/mixin.py

View check run for this annotation

Codecov / codecov/patch

fileformats/core/mixin.py#L228

Added line #L228 was not covered by tests
f"`load` method of side-car type {type(side_car)} must return a "
f"dictionary, not {type(side_car_metadata)!r}"
)
side_car_class_name: str = to_mime_format_name(type(side_car).__name__)
metadata[side_car_class_name] = side_car_metadata
return metadata

@classproperty
Expand Down
Loading
Loading