Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove partial loading for numpy arrays #716

Merged
merged 3 commits into from
Aug 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 25 additions & 21 deletions eolearn/core/eodata_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,15 @@ def _yield_savers(
yield feature_saver


def _get_feature_io_constructor(ftype: FeatureType, use_zarr: bool) -> type[FeatureIO]:
"""Creates the correct FeatureIO, corresponding to the FeatureType."""
if ftype is FeatureType.META_INFO:
return FeatureIOJson
if ftype.is_vector():
return FeatureIOGeoDf
return FeatureIOZarr if use_zarr else FeatureIONumpy # type: ignore[return-value] # not sure why
mlubej marked this conversation as resolved.
Show resolved Hide resolved


def _remove_redundant_files(
filesystem: FS,
new_files: list[str],
Expand Down Expand Up @@ -361,11 +370,23 @@ def _load_features(
def _get_feature_io(
ftype: FeatureType, path: str, filesystem: FS, temporal_selection: None | slice | list[int] | list[bool]
) -> FeatureIO:
if ftype is FeatureType.META_INFO:
return FeatureIOJson(path, filesystem)
if ftype.is_vector():
return FeatureIOGeoDf(path, filesystem)

use_zarr = path.endswith(FeatureIOZarr.get_file_extension())
constructor = _get_feature_io_constructor(ftype, use_zarr)

if ftype.is_temporal():
return constructor(path, filesystem, temporal_selection) # type: ignore[call-arg]
return constructor(path, filesystem)
if use_zarr:
return FeatureIOZarr(path, filesystem, temporal_selection)
if temporal_selection is None:
return FeatureIONumpy(path, filesystem)
raise IOError(
f"Cannot perform loading with temporal selection for numpy data at {path}. Resave feature with"
" `use_zarr=True` to enable loading with temporal selections."
)
return (FeatureIOZarr if use_zarr else FeatureIONumpy)(path, filesystem)
mlubej marked this conversation as resolved.
Show resolved Hide resolved


def get_filesystem_data_info(
Expand Down Expand Up @@ -595,16 +616,12 @@ def _write_to_file(cls, data: T, file: BinaryIO | gzip.GzipFile, path: str) -> N
class FeatureIONumpy(FeatureIOGZip[np.ndarray]):
"""FeatureIO object specialized for Numpy arrays."""

def __init__(self, path: str, filesystem: FS, temporal_selection: None | slice | list[int] | list[bool] = None):
self.temporal_selection = slice(None) if temporal_selection is None else temporal_selection
super().__init__(path, filesystem)

@classmethod
def _get_uncompressed_file_extension(cls) -> str:
return ".npy"

def _read_from_file(self, file: BinaryIO | gzip.GzipFile) -> np.ndarray:
return np.load(file, allow_pickle=True)[self.temporal_selection, ...]
return np.load(file, allow_pickle=True)

@classmethod
def _write_to_file(cls, data: np.ndarray, file: BinaryIO | gzip.GzipFile, _: str) -> None:
Expand All @@ -614,10 +631,6 @@ def _write_to_file(cls, data: np.ndarray, file: BinaryIO | gzip.GzipFile, _: str
class FeatureIOGeoDf(FeatureIOGZip[gpd.GeoDataFrame]):
"""FeatureIO object specialized for GeoDataFrames."""

def __init__(self, path: str, filesystem: FS, temporal_selection: None | slice | list[int] | list[bool] = None):
self.temporal_selection = temporal_selection # temporal selection currently does nothing
super().__init__(path, filesystem)

@classmethod
def _get_uncompressed_file_extension(cls) -> str:
return ".gpkg"
Expand Down Expand Up @@ -784,12 +797,3 @@ def _better_jsonify(param: object) -> Any:
if isinstance(param, Mapping):
return dict(param.items())
raise TypeError(f"Object of type {type(param)} is not yet supported in jsonify utility function")


def _get_feature_io_constructor(ftype: FeatureType, use_zarr: bool) -> type[FeatureIO]:
"""Creates the correct FeatureIO, corresponding to the FeatureType."""
if ftype is FeatureType.META_INFO:
return FeatureIOJson
if ftype.is_vector():
return FeatureIOGeoDf
return FeatureIOZarr if use_zarr else FeatureIONumpy
43 changes: 21 additions & 22 deletions tests/core/test_eodata_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import json
import os
import sys
import tempfile
import warnings
from typing import Any

Expand Down Expand Up @@ -91,11 +90,6 @@ def eopatch_fixture():
return eopatch


def test_saving_to_a_file(eopatch):
with tempfile.NamedTemporaryFile() as fp, pytest.raises(CreateFailed):
eopatch.save(fp.name)


@mock_s3
@pytest.mark.parametrize("fs_loader", FS_LOADERS)
@pytest.mark.parametrize("use_zarr", [True, False])
Expand Down Expand Up @@ -221,11 +215,9 @@ def test_save_add_only_features(eopatch, fs_loader, use_zarr: bool):

@mock_s3
@pytest.mark.parametrize("fs_loader", FS_LOADERS)
@pytest.mark.parametrize("use_zarr", [True, False])
def test_bbox_always_saved(eopatch, fs_loader, use_zarr: bool):
_skip_when_appropriate(fs_loader, use_zarr)
def test_bbox_always_saved(eopatch, fs_loader):
with fs_loader() as temp_fs:
eopatch.save("/", filesystem=temp_fs, features=[FeatureType.DATA], use_zarr=use_zarr)
eopatch.save("/", filesystem=temp_fs, features=[FeatureType.DATA])
assert temp_fs.exists("/bbox.geojson")


Expand Down Expand Up @@ -340,12 +332,10 @@ def test_save_and_load_tasks(eopatch, fs_loader, use_zarr: bool):

@mock_s3
@pytest.mark.parametrize("fs_loader", FS_LOADERS)
@pytest.mark.parametrize("use_zarr", [True, False])
def test_fail_saving_nonexistent_feature(eopatch, fs_loader, use_zarr: bool):
_skip_when_appropriate(fs_loader, use_zarr)
def test_fail_saving_nonexistent_feature(eopatch, fs_loader):
features = [(FeatureType.DATA, "nonexistent")]
with fs_loader() as temp_fs, pytest.raises(ValueError):
eopatch.save("/", filesystem=temp_fs, features=features, use_zarr=use_zarr)
eopatch.save("/", filesystem=temp_fs, features=features)


@mock_s3
Expand Down Expand Up @@ -570,15 +560,14 @@ def test_zarr_and_numpy_combined_loading(eopatch):

@mock_s3
@pytest.mark.parametrize("fs_loader", FS_LOADERS)
@pytest.mark.parametrize("use_zarr", [True, False])
@pytest.mark.parametrize(
"temporal_selection",
[None, slice(None, 3), slice(2, 4, 2), [3, 4], lambda ts: [i % 2 == 0 for i, _ in enumerate(ts)]],
)
def test_partial_temporal_loading(fs_loader: type[FS], eopatch: EOPatch, use_zarr: bool, temporal_selection):
_skip_when_appropriate(fs_loader, use_zarr)
def test_partial_temporal_loading(fs_loader: type[FS], eopatch: EOPatch, temporal_selection):
_skip_when_appropriate(fs_loader, True)
with fs_loader() as temp_fs:
eopatch.save(path="patch-folder", filesystem=temp_fs, use_zarr=use_zarr)
eopatch.save(path="patch-folder", filesystem=temp_fs, use_zarr=True)

full_patch = EOPatch.load(path="patch-folder", filesystem=temp_fs)
partial_patch = EOPatch.load(path="patch-folder", filesystem=temp_fs, temporal_selection=temporal_selection)
Expand All @@ -599,12 +588,22 @@ def test_partial_temporal_loading(fs_loader: type[FS], eopatch: EOPatch, use_zar

@mock_s3
@pytest.mark.parametrize("fs_loader", FS_LOADERS)
@pytest.mark.parametrize("use_zarr", [True, False])
def test_partial_temporal_loading_fails_for_numpy(fs_loader: type[FS], eopatch: EOPatch):
_skip_when_appropriate(fs_loader, True)
with fs_loader() as temp_fs:
eopatch.save(path="patch-folder", filesystem=temp_fs, use_zarr=False)

with pytest.raises(IOError):
EOPatch.load(path="patch-folder", filesystem=temp_fs, temporal_selection=[0])


@mock_s3
@pytest.mark.parametrize("fs_loader", FS_LOADERS)
@pytest.mark.parametrize("temporal_selection", [[3, 4, 10]])
def test_partial_temporal_loading_fails(fs_loader: type[FS], eopatch: EOPatch, use_zarr: bool, temporal_selection):
_skip_when_appropriate(fs_loader, use_zarr)
def test_partial_temporal_loading_fails_bad_selection(fs_loader: type[FS], eopatch: EOPatch, temporal_selection):
_skip_when_appropriate(fs_loader, True)
with fs_loader() as temp_fs:
eopatch.save(path="patch-folder", filesystem=temp_fs, use_zarr=use_zarr)
eopatch.save(path="patch-folder", filesystem=temp_fs, use_zarr=True)

with pytest.raises(IndexError):
EOPatch.load(path="patch-folder", filesystem=temp_fs, temporal_selection=temporal_selection)
Expand Down
5 changes: 3 additions & 2 deletions tests/core/test_eodata_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,8 @@ def test_lazy_loading(test_eopatch_path):

def test_temporally_independent_merge(test_eopatch_path):
full_patch = EOPatch.load(test_eopatch_path)
part1 = EOPatch.load(test_eopatch_path, temporal_selection=slice(None, 10))
part2 = EOPatch.load(test_eopatch_path, temporal_selection=slice(10, None))
part1, part2 = full_patch.copy(deep=True), full_patch.copy(deep=True)
part1.consolidate_timestamps(full_patch.get_timestamps()[:10])
part2.consolidate_timestamps(full_patch.get_timestamps()[10:])

assert full_patch == merge_eopatches(part1, part2, time_dependent_op="concatenate")