Skip to content

Commit

Permalink
Move absolute path finder from open_mfdataset to own function (#7968)
Browse files Browse the repository at this point in the history
* Move absolute path finder to own function

* Update common.py

* Workaround for strange \ behavior

* More workarounds

* Update common.py

* Update common.py

* Update common.py

* Update xarray/backends/common.py

Co-authored-by: Michael Niklas  <mick.niklas@gmail.com>

* Update common.py

---------

Co-authored-by: Michael Niklas  <mick.niklas@gmail.com>
  • Loading branch information
Illviljan and headtr1ck authored Jul 10, 2023
1 parent 17c9e8f commit 554285b
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 33 deletions.
40 changes: 7 additions & 33 deletions xarray/backends/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import os
from collections.abc import Hashable, Iterable, Mapping, MutableMapping, Sequence
from functools import partial
from glob import glob
from io import BytesIO
from numbers import Number
from typing import (
Expand All @@ -21,7 +20,12 @@

from xarray import backends, conventions
from xarray.backends import plugins
from xarray.backends.common import AbstractDataStore, ArrayWriter, _normalize_path
from xarray.backends.common import (
AbstractDataStore,
ArrayWriter,
_find_absolute_paths,
_normalize_path,
)
from xarray.backends.locks import _get_scheduler
from xarray.core import indexing
from xarray.core.combine import (
Expand Down Expand Up @@ -967,37 +971,7 @@ def open_mfdataset(
.. [1] https://docs.xarray.dev/en/stable/dask.html
.. [2] https://docs.xarray.dev/en/stable/dask.html#chunking-and-performance
"""
if isinstance(paths, str):
if is_remote_uri(paths) and engine == "zarr":
try:
from fsspec.core import get_fs_token_paths
except ImportError as e:
raise ImportError(
"The use of remote URLs for opening zarr requires the package fsspec"
) from e

fs, _, _ = get_fs_token_paths(
paths,
mode="rb",
storage_options=kwargs.get("backend_kwargs", {}).get(
"storage_options", {}
),
expand=False,
)
tmp_paths = fs.glob(fs._strip_protocol(paths)) # finds directories
paths = [fs.get_mapper(path) for path in tmp_paths]
elif is_remote_uri(paths):
raise ValueError(
"cannot do wild-card matching for paths that are remote URLs "
f"unless engine='zarr' is specified. Got paths: {paths}. "
"Instead, supply paths as an explicit list of strings."
)
else:
paths = sorted(glob(_normalize_path(paths)))
elif isinstance(paths, os.PathLike):
paths = [os.fspath(paths)]
else:
paths = [os.fspath(p) if isinstance(p, os.PathLike) else p for p in paths]
paths = _find_absolute_paths(paths, engine=engine, **kwargs)

if not paths:
raise OSError("no files to open")
Expand Down
78 changes: 78 additions & 0 deletions xarray/backends/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import time
import traceback
from collections.abc import Iterable
from glob import glob
from typing import TYPE_CHECKING, Any, ClassVar

import numpy as np
Expand All @@ -19,6 +20,7 @@
from io import BufferedIOBase

from xarray.core.dataset import Dataset
from xarray.core.types import NestedSequence

# Create a logger object, but don't add any handlers. Leave that to user code.
logger = logging.getLogger(__name__)
Expand All @@ -28,6 +30,24 @@


def _normalize_path(path):
"""
Normalize pathlikes to string.
Parameters
----------
path :
Path to file.
Examples
--------
>>> from pathlib import Path
>>> directory = Path(xr.backends.common.__file__).parent
>>> paths_path = Path(directory).joinpath("comm*n.py")
>>> paths_str = xr.backends.common._normalize_path(paths_path)
>>> print([type(p) for p in (paths_str,)])
[<class 'str'>]
"""
if isinstance(path, os.PathLike):
path = os.fspath(path)

Expand All @@ -37,6 +57,64 @@ def _normalize_path(path):
return path


def _find_absolute_paths(
paths: str | os.PathLike | NestedSequence[str | os.PathLike], **kwargs
) -> list[str]:
"""
Find absolute paths from the pattern.
Parameters
----------
paths :
Path(s) to file(s). Can include wildcards like * .
**kwargs :
Extra kwargs. Mainly for fsspec.
Examples
--------
>>> from pathlib import Path
>>> directory = Path(xr.backends.common.__file__).parent
>>> paths = str(Path(directory).joinpath("comm*n.py")) # Find common with wildcard
>>> paths = xr.backends.common._find_absolute_paths(paths)
>>> [Path(p).name for p in paths]
['common.py']
"""
if isinstance(paths, str):
if is_remote_uri(paths) and kwargs.get("engine", None) == "zarr":
try:
from fsspec.core import get_fs_token_paths
except ImportError as e:
raise ImportError(
"The use of remote URLs for opening zarr requires the package fsspec"
) from e

fs, _, _ = get_fs_token_paths(
paths,
mode="rb",
storage_options=kwargs.get("backend_kwargs", {}).get(
"storage_options", {}
),
expand=False,
)
tmp_paths = fs.glob(fs._strip_protocol(paths)) # finds directories
paths = [fs.get_mapper(path) for path in tmp_paths]
elif is_remote_uri(paths):
raise ValueError(
"cannot do wild-card matching for paths that are remote URLs "
f"unless engine='zarr' is specified. Got paths: {paths}. "
"Instead, supply paths as an explicit list of strings."
)
else:
paths = sorted(glob(_normalize_path(paths)))
elif isinstance(paths, os.PathLike):
paths = [os.fspath(paths)]
else:
paths = [os.fspath(p) if isinstance(p, os.PathLike) else p for p in paths]

return paths


def _encode_variable_name(name):
if name is None:
name = NONE_VAR_NAME
Expand Down

0 comments on commit 554285b

Please sign in to comment.