Skip to content

Commit

Permalink
Merge pull request #20 from TGSAI/enh/file_exists_error
Browse files Browse the repository at this point in the history
Nicer errors for file operations (exists, missing, can't read)
  • Loading branch information
tasansal authored Aug 26, 2022
2 parents f2d7238 + 4e6a4c2 commit 509fdcb
Show file tree
Hide file tree
Showing 8 changed files with 102 additions and 47 deletions.
28 changes: 9 additions & 19 deletions src/mdio/api/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@

from __future__ import annotations

from warnings import simplefilter
from warnings import warn

import dask.array as da
import numpy as np
import numpy.typing as npt
Expand All @@ -17,12 +14,10 @@
from mdio.api.io_utils import open_zarr_array_dask
from mdio.api.io_utils import process_url
from mdio.core import Grid
from mdio.core.exceptions import MDIONotFoundError
from mdio.exceptions import ShapeError


simplefilter("always", DeprecationWarning)


class MDIOAccessor:
"""Accessor class for MDIO files.
Expand Down Expand Up @@ -89,6 +84,9 @@ class MDIOAccessor:
False, which turns off disk caching. See `simplecache` from
`fsspec` documentation for more details.
Raises:
MDIONotFoundError: If the MDIO file can not be opened.
Notes:
The combination of the `Dask` backend and caching schemes are experimental.
This configuration may cause unexpected memory usage and duplicate data
Expand Down Expand Up @@ -198,21 +196,13 @@ def _connect(self):
mode=self.mode,
metadata_key="zmetadata",
)
except KeyError:
# Backwards compatibility pre v0.1.0
# This will be irrelevant when we go zarr v3.
self.store.key_separator = "."
self.root = zarr.open_consolidated(
store=self.store,
mode=self.mode,
)
except KeyError as e:
msg = (
"Encountered an older MDIO file (pre MDIO). The "
"support for these files will be removed at a future release. "
"Please consider re-ingesting your files with the latest "
"version of MDIO to avoid problems in the future."
f"MDIO file not found or corrupt at {self.store.path}. "
"Please check the URL or ensure it is not a deprecated "
"version of MDIO file."
)
warn(msg, DeprecationWarning, stacklevel=2)
raise MDIONotFoundError(msg) from e

def _deserialize_grid(self):
"""Deserialize grid from Zarr metadata."""
Expand Down
11 changes: 6 additions & 5 deletions src/mdio/api/io_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@

from __future__ import annotations

from collections.abc import MutableMapping
from typing import Any

import dask.array as da
import zarr
from zarr.storage import FSStore


def process_url(
Expand All @@ -16,7 +16,7 @@ def process_url(
storage_options: dict[str, Any],
memory_cache_size: int,
disk_cache: bool,
) -> MutableMapping:
) -> FSStore:
"""Check read/write access to FSStore target and return FSStore with double caching.
It uses a file cache (simplecache protocol from FSSpec) and an in-memory
Expand Down Expand Up @@ -51,15 +51,17 @@ def process_url(
elif "az://" in url or "abfs://" in url:
storage_options = {"abfs": storage_options}

# Strip whitespaces and slashes from end of string
url = url.rstrip("/ ")

# Flag for checking write access
check = True if mode == "w" else False

# TODO: Turning off write checking now because zarr has a bug.
# Get rid of this once bug is fixed.
check = False

# Let's open the FSStore and append LRU cache
store = zarr.storage.FSStore(
store = FSStore(
url=url,
check=check,
create=check,
Expand All @@ -68,7 +70,6 @@ def process_url(
**storage_options,
)

# Attach LRU Cache to store if requested.
if memory_cache_size != 0:
store = zarr.storage.LRUStoreCache(store=store, max_size=memory_cache_size)

Expand Down
24 changes: 12 additions & 12 deletions src/mdio/commands/segy.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,32 +229,32 @@ def segy_import(
\b
3D Seismic Post-Stack:
Chunks: 128 inlines x 128 crosslines x 128 samples
--header_locations 189,193
--header_names inline,crossline
--header-locations 189,193
--header-names inline,crossline
\b
3D Seismic Imaged Pre-Stack Gathers:
Chunks: 16 inlines x 16 crosslines x 16 offsets x 512 samples
--header_locations 189,193,37
--header_names inline,crossline,offset
--chunk_size 16,16,16,512
--header-locations 189,193,37
--header-names inline,crossline,offset
--chunk-size 16,16,16,512
\b
2D Seismic Shot Data (Byte Locations Vary):
Chunks: 16 shots x 256 channels x 512 samples
--header_locations 9,13
--header_names shot,chan
--chunk_size 16,256,512
--header-locations 9,13
--header-names shot,chan
--chunk-size 16,256,512
\b
3D Seismic Shot Data (Byte Locations Vary):
Let's assume streamer number is at byte 213 as 2-bytes
Chunks: 8 shots x 2 cables x 256 channels x 512 samples
--header_locations 9,213,13
--header_names shot,cable,chan
--header_lengths 4,2,4,4
--chunk_size 8,2,256,512
--header-locations 9,213,13
--header-names shot,cable,chan
--header-lengths 4,2,4,4
--chunk-size 8,2,256,512
"""
mdio.segy_to_mdio(
segy_path=input_segy_path,
Expand Down
16 changes: 16 additions & 0 deletions src/mdio/core/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
"""Core exceptions for MDIO."""


from mdio.exceptions import MDIOError


class MDIOAlreadyExistsError(MDIOError):
"""Raised when MDIO file already exists."""

pass


class MDIONotFoundError(MDIOError):
"""Raised when MDIO file doesn't exist."""

pass
5 changes: 4 additions & 1 deletion src/mdio/segy/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
"""Custom exceptions for SEG-Y."""


class InvalidSEGYFileError(IOError):
from mdio.exceptions import MDIOError


class InvalidSEGYFileError(MDIOError):
"""Raised when there is an IOError from segyio."""

pass
21 changes: 17 additions & 4 deletions src/mdio/segy/helpers_segy.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
"""Helper functions for tinkering with SEG-Y related Zarr."""


from collections.abc import MutableMapping
from math import prod

from zarr import Group
from zarr import open_group
from zarr.errors import ContainsGroupError
from zarr.storage import FSStore

from mdio.core.exceptions import MDIOAlreadyExistsError

def create_zarr_hierarchy(store: MutableMapping, overwrite: bool) -> Group:

def create_zarr_hierarchy(store: FSStore, overwrite: bool) -> Group:
"""Create `zarr` hierarchy for SEG-Y files.
Args:
Expand All @@ -17,11 +20,21 @@ def create_zarr_hierarchy(store: MutableMapping, overwrite: bool) -> Group:
Returns:
Zarr Group instance for root of the file.
Raises:
MDIOAlreadyExistsError: If a file with data already exists.
"""
root_group = open_group(store=store)

root_group.create_group(name="data", overwrite=overwrite)
root_group.create_group(name="metadata", overwrite=overwrite)
try:
root_group.create_group(name="data", overwrite=overwrite)
root_group.create_group(name="metadata", overwrite=overwrite)
except ContainsGroupError as e:
msg = (
f"An MDIO file with data already exists at '{store.path}'. "
"If this is intentional, please specify 'overwrite=True'."
)
raise MDIOAlreadyExistsError(msg) from e

return root_group

Expand Down
24 changes: 18 additions & 6 deletions tests/unit/conftest.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
"""Extra configurations for unit tests."""


from __future__ import annotations

from datetime import datetime
from importlib import metadata

import numpy as np
import pytest
import zarr
from numpy.typing import NDArray
from zarr import Group
from zarr import consolidate_metadata
from zarr.storage import FSStore

from mdio import MDIOReader
from mdio.core import Dimension
Expand All @@ -28,7 +33,7 @@
def mock_store(tmp_path_factory):
"""Make a mocked MDIO store for writing."""
tmp_dir = tmp_path_factory.mktemp("mdio")
return zarr.storage.FSStore(tmp_dir.name, dimension_separator="/")
return FSStore(tmp_dir.name, dimension_separator="/")


@pytest.fixture
Expand Down Expand Up @@ -68,7 +73,14 @@ def mock_data(mock_coords):


@pytest.fixture
def mock_mdio(mock_store, mock_dimensions, mock_coords, mock_data, mock_text, mock_bin):
def mock_mdio(
mock_store: FSStore,
mock_dimensions: list[Dimension],
mock_coords: tuple[NDArray],
mock_data: NDArray,
mock_text: list[str],
mock_bin: dict[str, int],
):
"""This mocks most of mdio.converters.segy in memory."""
zarr_root = create_zarr_hierarchy(
store=mock_store,
Expand Down Expand Up @@ -121,12 +133,12 @@ def mock_mdio(mock_store, mock_dimensions, mock_coords, mock_data, mock_text, mo
chunks=data_arr.chunks[:-1], # Same spatial chunks as data
)

zarr.consolidate_metadata(mock_store)
consolidate_metadata(mock_store)

return zarr_root


@pytest.fixture
def mock_reader(mock_mdio, mock_store):
def mock_reader(mock_mdio: Group) -> MDIOReader:
"""Reader that points to the mocked data to be used later."""
return MDIOReader(mock_store)
return MDIOReader(mock_mdio.store.path)
20 changes: 20 additions & 0 deletions tests/unit/test_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@
import numpy.testing as npt
import pytest

from mdio import MDIOReader
from mdio.core.exceptions import MDIOAlreadyExistsError
from mdio.core.exceptions import MDIONotFoundError
from mdio.segy.helpers_segy import create_zarr_hierarchy


class TestReader:
"""Tests for reader units."""
Expand Down Expand Up @@ -77,3 +82,18 @@ def test_coord_slicing(

for act_idx, exp_idx in zip(z_indices, z_index):
npt.assert_array_equal(mock_reader[..., act_idx], mock_data[..., exp_idx])


class TestExceptions:
"""Test custom exceptions and if they're raised properly."""

def test_mdio_not_found(self) -> None:
"""MDIO doesn't exist or corrupt."""
with pytest.raises(MDIONotFoundError):
MDIOReader("prefix/file_that_doesnt_exist.mdio")

def test_mdio_exists(self, mock_reader: MDIOReader) -> None:
"""MDIO doesn't exist or corrupt."""
mock_store = mock_reader.store
with pytest.raises(MDIOAlreadyExistsError):
create_zarr_hierarchy(mock_store, overwrite=False)

0 comments on commit 509fdcb

Please sign in to comment.