diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index dff0f69f..82a1460a 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -21,7 +21,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-20.04, windows-latest, macos-latest] - pyv: ['3.8', '3.9', '3.10', '3.11'] + pyv: ['3.8', '3.9', '3.10', '3.11', '3.12'] fsspec: [''] include: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a2b347f5..201e8386 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,7 +3,7 @@ default_language_version: exclude: ^upath/tests/pathlib/test_pathlib.*\.py|^upath/tests/pathlib/_test_support\.py repos: - repo: https://github.com/psf/black - rev: 23.3.0 + rev: 23.9.1 hooks: - id: black - repo: https://github.com/pre-commit/pre-commit-hooks @@ -30,7 +30,7 @@ repos: - id: codespell additional_dependencies: ["tomli"] - repo: https://github.com/asottile/pyupgrade - rev: v3.6.0 + rev: v3.13.0 hooks: - id: pyupgrade args: [--py38-plus] @@ -39,7 +39,7 @@ repos: hooks: - id: isort - repo: https://github.com/pycqa/flake8 - rev: 6.0.0 + rev: 6.1.0 hooks: - id: flake8 additional_dependencies: diff --git a/noxfile.py b/noxfile.py index 71d86e26..65f41eb2 100644 --- a/noxfile.py +++ b/noxfile.py @@ -9,8 +9,10 @@ locations = ("upath",) -@nox.session(python=["3.8", "3.9", "3.10", "3.11", "pypy3.8", "pypy3.9"]) +@nox.session(python=["3.8", "3.9", "3.10", "3.11", "3.12", "pypy3.8", "pypy3.9"]) def tests(session: nox.Session) -> None: + # workaround in case no aiohttp binary wheels are available + session.env["AIOHTTP_NO_EXTENSIONS"] = "1" session.install(".[dev]") session.run( "pytest", @@ -44,7 +46,7 @@ def lint(session: nox.Session) -> None: args = *(session.posargs or ("--show-diff-on-failure",)), "--all-files" session.run("pre-commit", "run", *args) - session.run("python", "-m", "mypy") + # session.run("python", "-m", "mypy") # session.run("python", "-m", "pylint", *locations) @@ -86,8 +88,8 @@ def black(session): @nox.session def type_checking(session): - print("please run `nox -s lint` instead") - raise SystemExit(1) + session.install("-e", ".[tests]") + session.run("python", "-m", "mypy") @nox.session() diff --git a/setup.cfg b/setup.cfg index 161eee85..8a6dd66a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -27,12 +27,12 @@ install_requires= [options.extras_require] tests = - pytest==7.3.2 - pytest-sugar==0.9.6 + pytest==8.0.0 + pytest-sugar==0.9.7 pytest-cov==4.1.0 - pytest-mock==3.11.1 + pytest-mock==3.12.0 pylint==2.17.4 - mypy==1.3.0 + mypy==1.8.0 packaging dev = %(tests)s @@ -41,12 +41,12 @@ dev = requests gcsfs s3fs - moto[s3,server] + moto[s3,server]<5 webdav4[fsspec] wsgidav cheroot - hadoop-test-cluster - pyarrow + # hadoop-test-cluster + # pyarrow pydantic pydantic-settings diff --git a/upath/__init__.py b/upath/__init__.py index a9bccf47..f5ec5279 100644 --- a/upath/__init__.py +++ b/upath/__init__.py @@ -1,9 +1,19 @@ """Pathlib API extended to use fsspec backends.""" -from upath.core import UPath +import sys try: from upath._version import __version__ except ImportError: __version__ = "not-installed" +if sys.version_info >= (3, 12): + import upath.core312plus as core + + sys.modules["upath.core"] = core + +else: + import upath.core as core + +UPath = core.UPath + __all__ = ["UPath"] diff --git a/upath/core312plus.py b/upath/core312plus.py new file mode 100644 index 00000000..dae68419 --- /dev/null +++ b/upath/core312plus.py @@ -0,0 +1,668 @@ +from __future__ import annotations + +import os +import posixpath +import re +import sys +import warnings +from copy import copy +from pathlib import Path +from pathlib import PurePath +from types import MappingProxyType +from typing import TYPE_CHECKING +from typing import Any +from typing import Mapping +from typing import TypeAlias +from typing import cast +from urllib.parse import urlsplit + +if sys.version_info >= (3, 11): + from typing import Self +else: + Self = Any + +from fsspec import AbstractFileSystem +from fsspec import filesystem +from fsspec import get_filesystem_class +from fsspec.core import strip_protocol as fsspec_strip_protocol + +from upath.registry import get_upath_class + +PathOrStr: TypeAlias = "str | PurePath | os.PathLike" + + +class _FSSpecAccessor: + """this is a compatibility shim and will be removed""" + + +class FSSpecFlavour: + """fsspec flavour for universal_pathlib + + **INTERNAL AND VERY MUCH EXPERIMENTAL** + + Implements the fsspec compatible low-level lexical operations on + PurePathBase-like objects. + + Note: + In case you find yourself in need of subclassing FSSpecFlavour, + please open an issue in the universal_pathlib issue tracker: + https://github.com/fsspec/universal_pathlib/issues + Ideally we can find a way to make your use-case work by adding + more functionality to this class. + + """ + + def __init__( + self, + *, + # URI behavior + join_prepends_protocol: bool = False, + join_like_urljoin: bool = False, + supports_empty_parts: bool = False, + supports_netloc: bool = False, + supports_query_parameters: bool = False, + supports_fragments: bool = False, + posixpath_only: bool = True, + # configurable separators + sep: str = "/", + altsep: str | None = None, + ): + self._owner = None + # separators + self.sep = sep + self.altsep = altsep + # configuration + self.join_prepends_protocol = join_prepends_protocol + self.join_like_urljoin = join_like_urljoin + self.supports_empty_parts = supports_empty_parts + self.supports_netloc = supports_netloc + self.supports_query_parameters = supports_query_parameters + self.supports_fragments = supports_fragments + self.posixpath_only = posixpath_only + + def __set_name__(self, owner, name): + # helper to provide a more informative repr + self._owner = owner.__name__ + + def _asdict(self) -> dict[str, Any]: + """return a dict representation of the flavour's settings""" + dct = vars(self).copy() + dct.pop("_owner") + return dct + + def __repr__(self): + return f"<{__name__}.{type(self).__name__} of {self._owner}>" + + def join(self, __path: PathOrStr, *paths: PathOrStr) -> str: + """Join two or more path components, inserting '/' as needed.""" + path = strip_upath_protocol(__path) + paths = map(strip_upath_protocol, paths) + + if self.join_like_urljoin: + path = path.removesuffix("/") + sep = self.sep + for b in paths: + if b.startswith(sep): + path = b + elif not path: + path += b + else: + path += sep + b + joined = path + elif self.posixpath_only: + joined = posixpath.join(path, *paths) + else: + joined = os.path.join(path, *paths) + + if self.join_prepends_protocol and (protocol := _match_protocol(__path)): + joined = f"{protocol}://{joined}" + + return joined + + def splitroot(self, __path: PathOrStr) -> tuple[str, str, str]: + """Split a path in the drive, the root and the rest.""" + if self.supports_fragments or self.supports_query_parameters: + url = urlsplit(__path) + drive = url._replace(path="", query="", fragment="").geturl() + path = url._replace(scheme="", netloc="").geturl() + root = "/" if path.startswith("/") else "" + return drive, root, path.removeprefix("/") + + path = strip_upath_protocol(__path) + if self.supports_netloc: + protocol = _match_protocol(__path) + if protocol: + drive, root, tail = path.partition("/") + return drive, root or "/", tail + else: + return "", "", path + elif self.posixpath_only: + return posixpath.splitroot(path) + else: + drv, root, path = os.path.splitroot(path) + if os.name == "nt" and not drv: + drv = "C:" + return drv, root, path + + def splitdrive(self, __path: PathOrStr) -> tuple[str, str]: + """Split a path into drive and path.""" + if self.supports_fragments or self.supports_query_parameters: + path = strip_upath_protocol(__path) + url = urlsplit(path) + path = url._replace(scheme="", netloc="").geturl() + drive = url._replace(path="", query="", fragment="").geturl() + return drive, path + + path = strip_upath_protocol(__path) + if self.supports_netloc: + protocol = _match_protocol(__path) + if protocol: + drive, root, tail = path.partition("/") + return drive, f"{root}{tail}" + else: + return "", path + elif self.posixpath_only: + return posixpath.splitdrive(path) + else: + drv, path = os.path.splitdrive(path) + if os.name == "nt" and not drv: + drv = "C:" + return drv, path + + def normcase(self, __path: PathOrStr) -> str: + """Normalize case of pathname. Has no effect under Posix""" + if self.posixpath_only: + return posixpath.normcase(__path) + else: + return os.path.normcase(__path) + + +_PROTOCOL_RE = re.compile( + r"^(?P[A-Za-z][A-Za-z0-9+]+):(?P//?)(?P.*)" +) + + +def strip_upath_protocol(pth: PathOrStr) -> str: + """strip protocol from path""" + if isinstance(pth, PurePath): + pth = str(pth) + elif not isinstance(pth, str): + pth = os.fspath(pth) + if m := _PROTOCOL_RE.match(pth): + protocol = m.group("protocol") + path = m.group("path") + if len(m.group("slashes")) == 1: + pth = f"{protocol}:///{path}" + return fsspec_strip_protocol(pth) + else: + return pth + + +def _match_protocol(pth: str) -> str: + if m := _PROTOCOL_RE.match(pth): + return m.group("protocol") + return "" + + +def get_upath_protocol( + pth: str | PurePath | os.PathLike, + *, + protocol: str | None = None, + storage_options: dict[str, Any] | None = None, +) -> str: + """return the filesystem spec protocol""" + if isinstance(pth, str): + pth_protocol = _match_protocol(pth) + elif isinstance(pth, UPath): + pth_protocol = pth.protocol + elif isinstance(pth, PurePath): + pth_protocol = "" + else: + pth_protocol = _match_protocol(os.fspath(pth)) + if storage_options and not protocol and not pth_protocol: + protocol = "file" + if protocol and pth_protocol and not pth_protocol.startswith(protocol): + raise ValueError( + f"requested protocol {protocol!r} incompatible with {pth_protocol!r}" + ) + return protocol or pth_protocol or "" + + +def _make_instance(cls, args, kwargs): + """helper for pickling UPath instances""" + return cls(*args, **kwargs) + + +class UPath(Path): + __slots__ = ( + "_protocol", + "_storage_options", + "_fs_cached", + ) + if TYPE_CHECKING: + _protocol: str + _storage_options: dict[str, Any] + _fs_cached: AbstractFileSystem + + _flavour = FSSpecFlavour() + + def __new__( + cls, *args, protocol: str | None = None, **storage_options: Any + ) -> UPath: + # fill empty arguments + if not args: + args = (".",) + + # create a copy if UPath class + part0, *parts = args + if not parts and not storage_options and isinstance(part0, cls): + return copy(part0) + + # deprecate 'scheme' + if "scheme" in storage_options: + warnings.warn( + "use 'protocol' kwarg instead of 'scheme'", + DeprecationWarning, + stacklevel=2, + ) + protocol = storage_options.pop("scheme") + + # determine which UPath subclass to dispatch to + pth_protocol = get_upath_protocol( + part0, protocol=protocol, storage_options=storage_options + ) + upath_cls = get_upath_class(protocol=pth_protocol) + if upath_cls is None: + raise ValueError(f"Unsupported filesystem: {pth_protocol!r}") + + # create a new instance + if cls is UPath: + # we called UPath() directly, and want an instance based on the + # provided or detected protocol (i.e. upath_cls) + obj: UPath = cast("UPath", object.__new__(upath_cls)) + obj._protocol = pth_protocol + + elif issubclass(cls, upath_cls): + # we called a sub- or sub-sub-class of UPath, i.e. S3Path() and the + # corresponding upath_cls based on protocol is equal-to or a + # parent-of the cls. + obj = cast("UPath", object.__new__(cls)) # type: ignore[unreachable] + obj._protocol = pth_protocol + + elif issubclass(cls, UPath): + # we called a subclass of UPath directly, i.e. S3Path() but the + # detected protocol would return a non-related UPath subclass, i.e. + # S3Path("file:///abc"). This behavior is going to raise an error + # in future versions + msg_protocol = repr(pth_protocol) + if not pth_protocol: + msg_protocol += " (empty string)" + msg = ( + f"{cls.__name__!s}(...) detected protocol {msg_protocol!s} and" + f" returns a {upath_cls.__name__} instance that isn't a direct" + f" subclass of {cls.__name__}. This will raise an exception in" + " future universal_pathlib versions. To prevent the issue, use" + " UPath(...) to create instances of unrelated protocols or you" + f" can instead derive your subclass {cls.__name__!s}(...) from" + f" {upath_cls.__name__} or alternatively override behavior via" + f" registering the {cls.__name__} implementation with protocol" + f" {msg_protocol!s} replacing the default implementation." + ) + warnings.warn(msg, DeprecationWarning, stacklevel=2) + + obj = cast("UPath", object.__new__(upath_cls)) + obj._protocol = pth_protocol + + upath_cls.__init__( + obj, *args, protocol=pth_protocol, **storage_options + ) # type: ignore + + else: + raise RuntimeError("UPath.__new__ expected cls to be subclass of UPath") + + return obj + + def __init__( + self, *args, protocol: str | None = None, **storage_options: Any + ) -> None: + # retrieve storage_options + if args: + args0 = args[0] + if isinstance(args0, UPath): + self._storage_options = {**args0.storage_options, **storage_options} + else: + fs_cls: type[AbstractFileSystem] = get_filesystem_class( + protocol or self._protocol + ) + pth_storage_options = fs_cls._get_kwargs_from_urls(str(args0)) + self._storage_options = {**pth_storage_options, **storage_options} + else: + self._storage_options = storage_options.copy() + + # check that UPath subclasses in args are compatible + # --> ensures items in _raw_paths are compatible + for arg in args: + if not isinstance(arg, UPath): + continue + # protocols: only identical (or empty "") protocols can combine + if arg.protocol and arg.protocol != self._protocol: + raise TypeError("can't combine different UPath protocols as parts") + # storage_options: args may not define other storage_options + if any( + self._storage_options.get(key) != value + for key, value in arg.storage_options.items() + ): + # raise ValueError( + # "can't combine different UPath storage_options as parts" + # ) todo: revisit and define behaviour + pass + + # fill ._raw_paths + super().__init__(*args) + + # === upath.UPath only ============================================ + + @property + def protocol(self) -> str: + return self._protocol + + @property + def storage_options(self) -> Mapping[str, Any]: + return MappingProxyType(self._storage_options) + + @property + def fs(self) -> AbstractFileSystem: + try: + return self._fs_cached + except AttributeError: + fs = self._fs_cached = filesystem( + protocol=self.protocol, **self.storage_options + ) + return fs + + @property + def path(self) -> str: + return super().__str__() + + @property + def _kwargs(self): + warnings.warn( + "use UPath.storage_options instead of UPath._kwargs", + DeprecationWarning, + stacklevel=2, + ) + return self.storage_options + + @property + def _url(self): # todo: deprecate + return urlsplit(self.as_posix()) + + # === pathlib.PurePath ============================================ + + def __reduce__(self): + args = tuple(self._raw_paths) + kwargs = { + "protocol": self._protocol, + **self._storage_options, + } + return _make_instance, (type(self), args, kwargs) + + def with_segments(self, *pathsegments): + return type(self)( + *pathsegments, + protocol=self._protocol, + **self._storage_options, + ) + + @classmethod + def _parse_path(cls, path): + if cls._flavour.supports_empty_parts: + drv, root, rel = cls._flavour.splitroot(path) + if not root: + parsed = [] + else: + parsed = list(map(sys.intern, rel.split(cls._flavour.sep))) + if parsed[-1] == ".": + parsed[-1] = "" + parsed = [x for x in parsed if x != "."] + return drv, root, parsed + return super()._parse_path(path) + + def __str__(self): + if self._protocol: + return f"{self._protocol}://{self.path}" + else: + return self.path + + def __fspath__(self): + msg = ( + "in a future version of UPath this will be set to None" + " unless the filesystem is local (or caches locally)" + ) + warnings.warn(msg, PendingDeprecationWarning, stacklevel=2) + return str(self) + + def __bytes__(self): + msg = ( + "in a future version of UPath this will be set to None" + " unless the filesystem is local (or caches locally)" + ) + warnings.warn(msg, PendingDeprecationWarning, stacklevel=2) + return os.fsencode(self) + + def as_uri(self): + return str(self) + + def is_reserved(self): + return False + + def relative_to(self, other, /, *_deprecated, walk_up=False): + if isinstance(other, UPath) and self.storage_options != other.storage_options: + raise ValueError( + "paths have different storage_options:" + f" {self.storage_options!r} != {other.storage_options!r}" + ) + return super().relative_to(other, *_deprecated, walk_up=walk_up) + + def is_relative_to(self, other, /, *_deprecated): + if isinstance(other, UPath) and self.storage_options != other.storage_options: + return False + return super().is_relative_to(other, *_deprecated) + + # === pathlib.Path ================================================ + + def stat(self, *, follow_symlinks=True): + return self.fs.stat(self.path) + + def lstat(self): + # return self.stat(follow_symlinks=False) + raise NotImplementedError + + def exists(self, *, follow_symlinks=True): + return self.fs.exists(self.path) + + def is_dir(self): + return self.fs.isdir(self.path) + + def is_file(self): + return self.fs.isfile(self.path) + + def is_mount(self): + return False + + def is_symlink(self): + try: + info = self.fs.info(self.path) + if "islink" in info: + return bool(info["islink"]) + except FileNotFoundError: + return False + return False + + def is_junction(self): + return False + + def is_block_device(self): + return False + + def is_char_device(self): + return False + + def is_fifo(self): + return False + + def is_socket(self): + return False + + def samefile(self, other_path): + raise NotImplementedError + + def open(self, mode="r", buffering=-1, encoding=None, errors=None, newline=None): + return self.fs.open(self.path, mode) # fixme + + def iterdir(self): + if self._flavour.supports_empty_parts and self.parts[-1:] == ("",): + base = self.with_segments(self.anchor, *self._tail[:-1]) + else: + base = self + for name in self.fs.listdir(self.path): + # fsspec returns dictionaries + if isinstance(name, dict): + name = name.get("name") + if name in {".", ".."}: + # Yielding a path object for these makes little sense + continue + # only want the path name with iterdir + _, _, name = name.removesuffix("/").rpartition(self._flavour.sep) + yield base._make_child_relpath(name) + + def _scandir(self): + raise NotImplementedError # todo + + def _make_child_relpath(self, name): + path = super()._make_child_relpath(name) + del path._str # fix _str = str(self) assignment + return path + + def glob(self, pattern: str, *, case_sensitive=None): + path_pattern = self.joinpath(pattern).path + sep = self._flavour.sep + for name in self.fs.glob(path_pattern): + name = name.removeprefix(self.path).removeprefix(sep) + yield self.joinpath(name) + + def rglob(self, pattern: str, *, case_sensitive=None): + r_path_pattern = self.joinpath("**", pattern).path + sep = self._flavour.sep + for name in self.fs.glob(r_path_pattern): + name = name.removeprefix(self.path).removeprefix(sep) + yield self.joinpath(name) + + @classmethod + def cwd(cls): + if cls is UPath: + return get_upath_class("").cwd() + else: + raise NotImplementedError + + @classmethod + def home(cls): + if cls is UPath: + return get_upath_class("").home() + else: + raise NotImplementedError + + def absolute(self) -> Self: + return self + + def resolve(self, strict: bool = False) -> Self: + _parts = self.parts + + # Do not attempt to normalize path if no parts are dots + if ".." not in _parts and "." not in _parts: + return self + + resolved: list[str] = [] + resolvable_parts = _parts[1:] + last_idx = len(resolvable_parts) - 1 + for idx, part in enumerate(resolvable_parts): + if part == "..": + if resolved: + resolved.pop() + if self._flavour.supports_empty_parts and idx == last_idx: + resolved.append("") + elif part != ".": + resolved.append(part) + + return self.with_segments(*_parts[:1], *resolved) + + def owner(self): + raise NotImplementedError + + def group(self): + raise NotImplementedError + + def readlink(self): + raise NotImplementedError + + def touch(self, mode=0o666, exist_ok=True): + self.fs.touch(self.path, truncate=not exist_ok) + + def mkdir(self, mode=0o777, parents=False, exist_ok=False): + if parents: + if not exist_ok and self.exists(): + raise FileExistsError(str(self)) + self.fs.makedirs(self.path, exist_ok=exist_ok) + else: + try: + self.fs.mkdir( + self.path, + create_parents=False, + mode=mode, + ) + except FileExistsError: + if not exist_ok or not self.is_dir(): + raise FileExistsError(str(self)) + + def chmod(self, mode, *, follow_symlinks=True): + raise NotImplementedError + + def unlink(self, missing_ok=False): + if not self.exists(): + if not missing_ok: + raise FileNotFoundError(str(self)) + return + self.fs.rm(self.path, recursive=False) + + def rmdir(self, recursive: bool = True): # fixme: non-standard + if not self.is_dir(): + raise NotADirectoryError(str(self)) + if not recursive and next(self.iterdir()): + raise OSError(f"Not recursive and directory not empty: {self}") + self.fs.rm(self.path, recursive=recursive) + + def rename( + self, target, *, recursive=False, maxdepth=None, **kwargs + ): # fixme: non-standard + if not isinstance(target, UPath): + target = self.parent.joinpath(target).resolve() + self.fs.mv( + self.path, + target.path, + recursive=recursive, + maxdepth=maxdepth, + **kwargs, + ) + return target + + def replace(self, target): + raise NotImplementedError # todo + + def symlink_to(self, target, target_is_directory=False): + raise NotImplementedError + + def hardlink_to(self, target): + raise NotImplementedError + + def expanduser(self): + raise NotImplementedError diff --git a/upath/implementations/cloud.py b/upath/implementations/cloud.py index d03388f4..c9ad05e7 100644 --- a/upath/implementations/cloud.py +++ b/upath/implementations/cloud.py @@ -1,6 +1,9 @@ from __future__ import annotations import re +import sys +import warnings +from typing import Any import upath.core @@ -23,7 +26,6 @@ def mkdir(self, path, create_parents=True, **kwargs): return super().mkdir(path, create_parents=create_parents, **kwargs) -# project is not part of the path, but is part of the credentials class CloudPath(upath.core.UPath): _default_accessor = _CloudAccessor @@ -60,16 +62,28 @@ def _sub_path(self, name): def joinpath(self, *args): if self._url.netloc: return super().joinpath(*args) - # handles a bucket in the path - else: - path = args[0] - if isinstance(path, list): - args_list = list(*args) + + # if no bucket is defined for self + sep = self._flavour.sep + args_list = [] + for arg in args: + if isinstance(arg, list): + warnings.warn( + "lists as arguments to joinpath are deprecated", + DeprecationWarning, + stacklevel=2, + ) + args_list.extend(arg) else: - args_list = path.split(self._flavour.sep) - bucket = args_list.pop(0) - self._kwargs["bucket"] = bucket - return super().joinpath(*tuple(args_list)) + args_list.extend(arg.split(sep)) + bucket = args_list.pop(0) + return type(self)( + "/", + *args_list, + **self.storage_options, + bucket=bucket, + scheme=self.protocol, + ) @property def path(self) -> str: @@ -78,13 +92,56 @@ def path(self) -> str: return f"{self._url.netloc}{super()._path}" +if sys.version_info >= (3, 12): + from upath.core312plus import FSSpecFlavour + + class CloudPath(upath.core312plus.UPath): # noqa + __slots__ = () + _flavour = FSSpecFlavour( + join_prepends_protocol=True, + supports_netloc=True, + ) + + def __init__( + self, *args, protocol: str | None = None, **storage_options: Any + ) -> None: + if "bucket" in storage_options: + bucket = storage_options.pop("bucket") + args = [f"{self._protocol}://{bucket}/", *args] + super().__init__(*args, protocol=protocol, **storage_options) + + def mkdir( + self, mode: int = 0o777, parents: bool = False, exist_ok: bool = False + ) -> None: + if not parents and not exist_ok and self.exists(): + raise FileExistsError(self.path) + super().mkdir(mode=mode, parents=parents, exist_ok=exist_ok) + + def iterdir(self): + if self.is_file(): + raise NotADirectoryError(str(self)) + yield from super().iterdir() + + def relative_to(self, other, /, *_deprecated, walk_up=False): + # use the parent implementation for the ValueError logic + super().relative_to(other, *_deprecated, walk_up=False) + return self + + class GCSPath(CloudPath): - pass + __slots__ = () class S3Path(CloudPath): - pass + __slots__ = () class AzurePath(CloudPath): - pass + __slots__ = () + + def touch(self, mode=0o666, exist_ok=True): + if exist_ok and self.exists(): + with self.fs.open(self.path, mode="a"): + pass + else: + self.fs.touch(self.path, truncate=True) diff --git a/upath/implementations/hdfs.py b/upath/implementations/hdfs.py index 19e5a57e..50b1c75c 100644 --- a/upath/implementations/hdfs.py +++ b/upath/implementations/hdfs.py @@ -1,5 +1,7 @@ from __future__ import annotations +import sys + import upath.core @@ -35,3 +37,20 @@ def listdir(self, path, **kwargs): class HDFSPath(upath.core.UPath): _default_accessor = _HDFSAccessor + + +if sys.version_info >= (3, 12): + import upath.core312plus + + class HDFSPath(upath.core312plus.UPath): # noqa + __slots__ = () + + def mkdir(self, mode=0o777, parents=False, exist_ok=False): + if not exist_ok and self.exists(): + raise FileExistsError(str(self)) + super().mkdir(mode=mode, parents=parents, exist_ok=exist_ok) + + def iterdir(self): + if self.is_file(): + raise NotADirectoryError(str(self)) + yield from super().iterdir() diff --git a/upath/implementations/http.py b/upath/implementations/http.py index 6f215d93..9f647316 100644 --- a/upath/implementations/http.py +++ b/upath/implementations/http.py @@ -1,5 +1,7 @@ from __future__ import annotations +import sys +from itertools import chain from urllib.parse import urlunsplit from fsspec.asyn import sync @@ -86,9 +88,109 @@ def resolve( return resolved_path + @property + def drive(self): + return f"{self._url.scheme}://{self._url.netloc}" + + @property + def anchor(self) -> str: + return self.drive + self.root + + @property + def parts(self) -> tuple[str, ...]: + parts = super().parts + if not parts: + return () + p0, *partsN = parts + if p0 == "/": + p0 = self.anchor + if not partsN and self._url and self._url.path == "/": + partsN = [""] + return (p0, *partsN) + @property def path(self) -> str: # http filesystems use the full url as path if self._url is None: raise RuntimeError(str(self)) return urlunsplit(self._url) + + +if sys.version_info >= (3, 12): # noqa + from upath.core312plus import FSSpecFlavour + + class HTTPPath(upath.core312plus.UPath): # noqa + _flavour = FSSpecFlavour( + join_like_urljoin=True, + supports_empty_parts=True, + supports_netloc=True, + supports_query_parameters=True, + supports_fragments=True, + ) + + @property + def root(self) -> str: + return super().root or "/" + + def __str__(self): + return super(upath.core312plus.UPath, self).__str__() + + def is_file(self): + try: + next(super().iterdir()) + except (StopIteration, NotADirectoryError): + return True + except FileNotFoundError: + return False + else: + return False + + def is_dir(self): + try: + next(super().iterdir()) + except (StopIteration, NotADirectoryError): + return False + except FileNotFoundError: + return False + else: + return True + + def iterdir(self): + it = iter(super().iterdir()) + try: + item0 = next(it) + except (StopIteration, NotADirectoryError): + raise NotADirectoryError(str(self)) + except FileNotFoundError: + raise FileNotFoundError(str(self)) + else: + yield from chain([item0], it) + + def resolve( + self: HTTPPath, + strict: bool = False, + follow_redirects: bool = True, + ) -> HTTPPath: + """Normalize the path and resolve redirects.""" + # Normalise the path + resolved_path = super().resolve(strict=strict) + + if follow_redirects: + # Get the fsspec fs + fs = self.fs + url = str(self) + # Ensure we have a session + session = sync(fs.loop, fs.set_session) + # Use HEAD requests if the server allows it, falling back to GETs + for method in (session.head, session.get): + r = sync(fs.loop, method, url, allow_redirects=True) + try: + r.raise_for_status() + except Exception as exc: + if method == session.get: + raise FileNotFoundError(self) from exc + else: + resolved_path = HTTPPath(str(r.url)) + break + + return resolved_path diff --git a/upath/implementations/local.py b/upath/implementations/local.py index 61614fef..e51d3871 100644 --- a/upath/implementations/local.py +++ b/upath/implementations/local.py @@ -1,6 +1,8 @@ from __future__ import annotations import os +import sys +from inspect import ismemberdescriptor from pathlib import Path from pathlib import PosixPath from pathlib import WindowsPath @@ -14,37 +16,51 @@ __all__ = [ "LocalPath", + "FilePath", "PosixUPath", "WindowsUPath", ] class LocalPath(UPath): - pass + __slots__ = () + + +class FilePath(LocalPath): + __slots__ = () + + +_PY310_IGNORE = {"__slots__", "__module__", "_from_parts", "__new__"} -def _iterate_class_attrs(path_cls: type[Path]) -> Iterable[tuple[str, Any]]: - ignore = {"__slots__", "__module__", "_from_parts", "__new__"} +def _iterate_class_attrs( + path_cls: type[Path], + ignore: set[str] = frozenset(), +) -> Iterable[tuple[str, Any]]: visited = set() for cls in path_cls.__mro__: + if cls is object: + continue for attr, func_or_value in cls.__dict__.items(): if attr in ignore: continue if attr in visited: continue + if ismemberdescriptor(func_or_value): + continue yield attr, func_or_value visited.add(attr) -class PosixUPath(PosixPath, UPath): +class PosixUPath(PosixPath, LocalPath): __slots__ = () if os.name == "nt": - __new__ = PosixPath.__new__ + __new__ = PosixPath.__new__ # type: ignore # assign all PosixPath methods/attrs to prevent multi inheritance issues - for attr, func_or_attr in _iterate_class_attrs(PosixPath): + for attr, func_or_attr in _iterate_class_attrs(PosixPath, ignore=_PY310_IGNORE): locals()[attr] = func_or_attr del attr, func_or_attr @@ -80,14 +96,14 @@ def _from_parsed_parts( return obj -class WindowsUPath(WindowsPath, UPath): +class WindowsUPath(WindowsPath, LocalPath): __slots__ = () if os.name != "nt": - __new__ = WindowsPath.__new__ + __new__ = WindowsPath.__new__ # type: ignore # assign all WindowsPath methods/attrs to prevent multi inheritance issues - for attr, func_or_attr in _iterate_class_attrs(WindowsPath): + for attr, func_or_attr in _iterate_class_attrs(WindowsPath, ignore=_PY310_IGNORE): locals()[attr] = func_or_attr del attr, func_or_attr @@ -121,3 +137,53 @@ def _from_parsed_parts( obj._kwargs = {} obj._url = SplitResult("", "", str(obj), "", "") return obj + + +if sys.version_info >= (3, 12): # noqa: C901 + from upath.core312plus import FSSpecFlavour + + class LocalPath(UPath): + __slots__ = () + _flavour = FSSpecFlavour( + posixpath_only=False, + ) + + @property + def path(self): + sep = self._flavour.sep + if self.drive: + return f"/{super().path}".replace(sep, "/") + return super().path.replace(sep, "/") + + @property + def _url(self): + return SplitResult(self.protocol, "", self.path, "", "") + + class FilePath(LocalPath): # noqa + __slots__ = () + + _PY312_IGNORE = {"__slots__", "__module__", "__new__", "__init__", "with_segments"} + + class PosixUPath(PosixPath, LocalPath): # noqa + __slots__ = () + + if os.name == "nt": + __new__ = PosixPath.__new__ + + # assign all PosixPath methods/attrs to prevent multi inheritance issues + for attr, func_or_attr in _iterate_class_attrs(PosixPath, ignore=_PY312_IGNORE): + locals()[attr] = func_or_attr + del attr, func_or_attr + + class WindowsUPath(WindowsPath, LocalPath): # noqa + __slots__ = () + + if os.name != "nt": + __new__ = WindowsPath.__new__ + + # assign all WindowsPath methods/attrs to prevent multi inheritance issues + for attr, func_or_attr in _iterate_class_attrs( + WindowsPath, ignore=_PY312_IGNORE + ): + locals()[attr] = func_or_attr + del attr, func_or_attr diff --git a/upath/implementations/memory.py b/upath/implementations/memory.py index 8f4503fe..09e564a7 100644 --- a/upath/implementations/memory.py +++ b/upath/implementations/memory.py @@ -1,10 +1,10 @@ from __future__ import annotations +import sys from typing import Any from urllib.parse import SplitResult import upath.core -from upath.core import PT class _MemoryAccessor(upath.core._FSSpecAccessor): @@ -34,7 +34,6 @@ def iterdir(self): @classmethod def _from_parts(cls, args, url=None, **kwargs): - print("A", args, url) if url and url.netloc: if args: if args[0].startswith("/"): @@ -43,12 +42,11 @@ def _from_parts(cls, args, url=None, **kwargs): else: args[:] = f"/{url.netloc}" url = url._replace(netloc="") - print("B", args, url) return super()._from_parts(args, url=url, **kwargs) @classmethod def _format_parsed_parts( - cls: type[PT], + cls, drv: str, root: str, parts: list[str], @@ -59,3 +57,23 @@ def _format_parsed_parts( if s.startswith("memory:///"): s = s.replace("memory:///", "memory://", 1) return s + + +if sys.version_info >= (3, 12): + + class MemoryPath(upath.core.UPath): # noqa + def iterdir(self): + if not self.is_dir(): + raise NotADirectoryError(str(self)) + yield from super().iterdir() + + @property + def path(self): + path = super().path + return "/" if path == "." else path + + def __str__(self): + s = super().__str__() + if s.startswith("memory:///"): + s = s.replace("memory:///", "memory://", 1) + return s diff --git a/upath/implementations/webdav.py b/upath/implementations/webdav.py index 434f0444..ff1f1225 100644 --- a/upath/implementations/webdav.py +++ b/upath/implementations/webdav.py @@ -1,7 +1,9 @@ from __future__ import annotations +import sys from typing import Any from urllib.parse import ParseResult +from urllib.parse import urlsplit from urllib.parse import urlunsplit import upath.core @@ -13,7 +15,7 @@ def __init__(self, parsed_url: ParseResult, **kwargs): parsed_url = parsed_url._replace(scheme=parsed_url.scheme[7:], path="") base_url = urlunsplit(parsed_url) - self._fs = WebdavFileSystem(base_url, **kwargs) + self._fs = WebdavFileSystem(base_url=base_url, **kwargs) def listdir(self, path, **kwargs): base_url = urlunsplit(path._url._replace(path="")) @@ -67,3 +69,46 @@ def storage_options(self) -> dict[str, Any]: base_url = urlunsplit(self._url._replace(scheme=http_protocol, path="")) sopts["base_url"] = base_url return sopts + + +if sys.version_info >= (3, 12): + import upath.core312plus + + class WebdavPath(upath.core312plus.UPath): # noqa + __slots__ = () + + def __init__( + self, *args, protocol: str | None = None, **storage_options: Any + ) -> None: + base_options = getattr(self, "_storage_options", {}) # when unpickling + if args: + args0, *argsN = args + url = urlsplit(str(args0)) + args0 = urlunsplit(url._replace(scheme="", netloc="")) or "/" + if "base_url" not in storage_options: + if self._protocol == "webdav+http": + storage_options["base_url"] = urlunsplit( + url._replace(scheme="http", path="") + ) + elif self._protocol == "webdav+https": + storage_options["base_url"] = urlunsplit( + url._replace(scheme="https", path="") + ) + else: + args0, argsN = "/", () + storage_options = {**base_options, **storage_options} + if "base_url" not in storage_options: + raise ValueError( + f"must provide `base_url` storage option for args: {args!r}" + ) + self._protocol = "webdav" + super().__init__(args0, *argsN, protocol="webdav", **storage_options) + + @property + def path(self) -> str: + # webdav paths don't start at "/" + return super().path.removeprefix("/") + + def __str__(self): + base_url = self.storage_options["base_url"].removesuffix("/") + return super().__str__().replace("webdav://", f"webdav+{base_url}", 1) diff --git a/upath/registry.py b/upath/registry.py index 1953caa3..1bf6f67f 100644 --- a/upath/registry.py +++ b/upath/registry.py @@ -43,7 +43,7 @@ from fsspec.core import get_filesystem_class from fsspec.registry import known_implementations as _fsspec_known_implementations -import upath.core +import upath __all__ = [ "get_upath_class", @@ -55,7 +55,7 @@ _ENTRY_POINT_GROUP = "universal_pathlib.implementations" -class _Registry(MutableMapping[str, "type[upath.core.UPath]"]): +class _Registry(MutableMapping[str, "type[upath.UPath]"]): """internal registry for UPath subclasses""" known_implementations: dict[str, str] = { @@ -73,6 +73,7 @@ class _Registry(MutableMapping[str, "type[upath.core.UPath]"]): "memory": "upath.implementations.memory.MemoryPath", "s3": "upath.implementations.cloud.S3Path", "s3a": "upath.implementations.cloud.S3Path", + "webdav": "upath.implementations.webdav.WebdavPath", "webdav+http": "upath.implementations.webdav.WebdavPath", "webdav+https": "upath.implementations.webdav.WebdavPath", } @@ -88,7 +89,7 @@ def __init__(self) -> None: def __contains__(self, item: object) -> bool: return item in set().union(self._m, self._entries) - def __getitem__(self, item: str) -> type[upath.core.UPath]: + def __getitem__(self, item: str) -> type[upath.UPath]: fqn = self._m.get(item) if fqn is None: if item in self._entries: @@ -103,14 +104,16 @@ def __getitem__(self, item: str) -> type[upath.core.UPath]: cls = fqn return cls - def __setitem__(self, item: str, value: type[upath.core.UPath] | str) -> None: + def __setitem__(self, item: str, value: type[upath.UPath] | str) -> None: if not ( - (isinstance(value, type) and issubclass(value, upath.core.UPath)) + (isinstance(value, type) and issubclass(value, upath.UPath)) or isinstance(value, str) ): raise ValueError( f"expected UPath subclass or FQN-string, got: {type(value).__name__!r}" ) + if not item or item in self._m: + get_upath_class.cache_clear() self._m[item] = value def __delitem__(self, __v: str) -> None: @@ -144,7 +147,7 @@ def available_implementations(*, fallback: bool = False) -> list[str]: def register_implementation( protocol: str, - cls: type[upath.core.UPath] | str, + cls: type[upath.UPath] | str, *, clobber: bool = False, ) -> None: @@ -173,7 +176,7 @@ def get_upath_class( protocol: str, *, fallback: bool = True, -) -> type[upath.core.UPath] | None: +) -> type[upath.UPath] | None: """Return the upath cls for the given protocol. Returns `None` if no matching protocol can be found. @@ -212,4 +215,4 @@ def get_upath_class( UserWarning, stacklevel=2, ) - return upath.core.UPath + return upath.UPath diff --git a/upath/tests/cases.py b/upath/tests/cases.py index f91b4c2e..bcd43824 100644 --- a/upath/tests/cases.py +++ b/upath/tests/cases.py @@ -61,7 +61,9 @@ def test_glob(self, pathlib_base, pattern): path_glob = list(pathlib_base.glob(pattern)) _mock_start = len(self.path.parts) - mock_glob_normalized = sorted([a.parts[_mock_start:] for a in mock_glob]) + mock_glob_normalized = sorted( + [tuple(filter(None, a.parts[_mock_start:])) for a in mock_glob] + ) _path_start = len(pathlib_base.parts) path_glob_normalized = sorted([a.parts[_path_start:] for a in path_glob]) @@ -189,6 +191,7 @@ def test_mkdir_parents_true_exists_ok_false(self): with pytest.raises(FileExistsError): new_dir.mkdir(parents=True, exist_ok=False) + @pytest.mark.xfail(sys.version_info >= (3, 12), reason="only valid on python<=3.11") def test_makedirs_exist_ok_true(self): new_dir = self.path.joinpath("parent", "child", "dir_may_not_exist") new_dir._accessor.makedirs(new_dir, exist_ok=True) @@ -196,6 +199,7 @@ def test_makedirs_exist_ok_true(self): new_dir.joinpath(".file").touch() new_dir._accessor.makedirs(new_dir, exist_ok=True) + @pytest.mark.xfail(sys.version_info >= (3, 12), reason="only valid on python<=3.11") def test_makedirs_exist_ok_false(self): new_dir = self.path.joinpath("parent", "child", "dir_may_exist") new_dir._accessor.makedirs(new_dir, exist_ok=False) @@ -345,7 +349,7 @@ def test_pickling(self): pickled_path = pickle.dumps(path) recovered_path = pickle.loads(pickled_path) - assert type(path) == type(recovered_path) + assert type(path) is type(recovered_path) assert str(path) == str(recovered_path) assert path.fs.storage_options == recovered_path.fs.storage_options @@ -354,12 +358,13 @@ def test_pickling_child_path(self): pickled_path = pickle.dumps(path) recovered_path = pickle.loads(pickled_path) - assert type(path) == type(recovered_path) + assert type(path) is type(recovered_path) assert str(path) == str(recovered_path) - assert path._drv == recovered_path._drv - assert path._root == recovered_path._root - assert path._parts == recovered_path._parts + assert path.drive == recovered_path.drive + assert path.root == recovered_path.root + assert path.parts == recovered_path.parts assert path.fs.storage_options == recovered_path.fs.storage_options + assert path.storage_options == recovered_path.storage_options def test_child_path(self): path_str = str(self.path).rstrip("/") @@ -367,20 +372,18 @@ def test_child_path(self): path_b = self.path / "folder" assert str(path_a) == str(path_b) - assert path_a._root == path_b._root - assert path_a._drv == path_b._drv - assert path_a._parts == path_b._parts - assert path_a._url == path_b._url + assert path_a.root == path_b.root + assert path_a.drive == path_b.drive def test_copy_path(self): path = self.path copy_path = UPath(path) - assert type(path) == type(copy_path) + assert type(path) is type(copy_path) assert str(path) == str(copy_path) - assert path._drv == copy_path._drv - assert path._root == copy_path._root - assert path._parts == copy_path._parts + assert path.drive == copy_path.drive + assert path.root == copy_path.root + assert path.parts == copy_path.parts assert path.fs.storage_options == copy_path.fs.storage_options def test_with_name(self): @@ -430,6 +433,7 @@ def test_private_url_attr_in_sync(self): p2 = self.path / "c" assert p1._url == p2._url assert p1._url != p._url + assert p1.protocol == p2.protocol def test_as_uri(self): # test that we can reconstruct the path from the uri diff --git a/upath/tests/conftest.py b/upath/tests/conftest.py index 65cc65f5..04d2b27a 100644 --- a/upath/tests/conftest.py +++ b/upath/tests/conftest.py @@ -11,8 +11,10 @@ import fsspec import pytest from fsspec.implementations.local import LocalFileSystem +from fsspec.implementations.local import make_path_posix from fsspec.registry import _registry from fsspec.registry import register_implementation +from fsspec.utils import stringify_path from .utils import posixify @@ -21,6 +23,15 @@ class DummyTestFS(LocalFileSystem): protocol = "mock" root_marker = "/" + @classmethod + def _strip_protocol(cls, path): + path = stringify_path(path) + if path.startswith("mock://"): + path = path[7:] + elif path.startswith("mock:"): + path = path[5:] + return make_path_posix(path).rstrip("/") or cls.root_marker + @pytest.fixture(scope="session") def clear_registry(): diff --git a/upath/tests/implementations/test_http.py b/upath/tests/implementations/test_http.py index c585437e..00a2a02d 100644 --- a/upath/tests/implementations/test_http.py +++ b/upath/tests/implementations/test_http.py @@ -115,3 +115,22 @@ def test_rename(self): def test_rename2(self): with pytest.raises(NotImplementedError): return super().test_rename() + + +@pytest.mark.parametrize( + "args,parts", + [ + (("http://example.com/"), ("http://example.com/", "")), + (("http://example.com//"), ("http://example.com/", "", "")), + (("http://example.com///"), ("http://example.com/", "", "", "")), + (("http://example.com/a"), ("http://example.com/", "a")), + (("http://example.com/a/"), ("http://example.com/", "a", "")), + (("http://example.com/a/b"), ("http://example.com/", "a", "b")), + (("http://example.com/a//b"), ("http://example.com/", "a", "", "b")), + (("http://example.com/a//b/"), ("http://example.com/", "a", "", "b", "")), + ], +) +def test_empty_parts(args, parts): + pth = UPath(args) + pth_parts = pth.parts + assert pth_parts == parts diff --git a/upath/tests/implementations/test_memory.py b/upath/tests/implementations/test_memory.py index 6edbfad2..8e84dc9d 100644 --- a/upath/tests/implementations/test_memory.py +++ b/upath/tests/implementations/test_memory.py @@ -1,3 +1,5 @@ +import sys + import pytest from upath import UPath @@ -29,7 +31,10 @@ def test_is_MemoryPath(self): pytest.param( "memory://a", "memory://a", - marks=pytest.mark.xfail(reason="currently broken due to urllib parsing"), + marks=pytest.mark.xfail( + sys.version_info < (3, 12), + reason="currently broken due to urllib parsing", + ), ), ("memory://a/b", "memory://a/b"), ("memory:///", "memory://"), diff --git a/upath/tests/implementations/test_s3.py b/upath/tests/implementations/test_s3.py index de9ef639..ae5d10b2 100644 --- a/upath/tests/implementations/test_s3.py +++ b/upath/tests/implementations/test_s3.py @@ -66,10 +66,12 @@ def test_touch_unlink(self): # file doesn't exists, but missing_ok is True path.unlink(missing_ok=True) - @pytest.mark.parametrize("joiner", [["bucket", "path", "file"], "bucket/path/file"]) + @pytest.mark.parametrize( + "joiner", [["bucket", "path", "file"], ["bucket/path/file"]] + ) def test_no_bucket_joinpath(self, joiner): path = UPath("s3://", anon=self.anon, **self.s3so) - path = path.joinpath(joiner) + path = path.joinpath(*joiner) assert str(path) == "s3://bucket/path/file" def test_creating_s3path_with_bucket(self): diff --git a/upath/tests/implementations/test_webdav.py b/upath/tests/implementations/test_webdav.py index 756d456a..85572abe 100644 --- a/upath/tests/implementations/test_webdav.py +++ b/upath/tests/implementations/test_webdav.py @@ -18,7 +18,7 @@ def test_storage_options(self): # we need to add base_url to storage options for webdav filesystems, # to be able to serialize the http protocol to string... storage_options = self.path.storage_options - base_url = storage_options.pop("base_url") + base_url = storage_options["base_url"] assert storage_options == self.path.fs.storage_options assert base_url == self.path.fs.client.base_url diff --git a/upath/tests/pathlib/test_pathlib_312.py b/upath/tests/pathlib/test_pathlib_312.py index bb1f1dfb..71cab6b8 100644 --- a/upath/tests/pathlib/test_pathlib_312.py +++ b/upath/tests/pathlib/test_pathlib_312.py @@ -1,4 +1,3 @@ -import contextlib import collections.abc import io import os @@ -23,15 +22,12 @@ except ImportError: grp = pwd = None +import upath +from upath.core312plus import UPath +from upath.implementations.local import PosixUPath, WindowsUPath + import pytest -try: - from upath.core import UPath - from upath.implementations.local import PosixUPath, WindowsUPath -except ImportError: - UPath = PosixUPath = WindowsUPath = object - pytestmark = pytest.mark.xfail(reason="no py312 support yet") -else: - pytestmark = pytest.mark.skipif(sys.version_info[:2] != (3, 12), reason="py312 only") +pytestmark = pytest.mark.skipif(sys.version_info[:2] != (3, 12), reason="py312 only") # @@ -39,13 +35,9 @@ # class _BasePurePathSubclass(object): - def __init__(self, *pathsegments, session_id): - super().__init__(*pathsegments) - self.session_id = session_id - - def with_segments(self, *pathsegments): - return type(self)(*pathsegments, session_id=self.session_id) - + @property + def session_id(self): + return self.storage_options["session_id"] class _BasePurePathTest(object): @@ -88,13 +80,11 @@ def test_constructor_common(self): def test_bytes(self): P = self.cls - message = (r"argument should be a str or an os\.PathLike object " - r"where __fspath__ returns a str, not 'bytes'") - with self.assertRaisesRegex(TypeError, message): + with self.assertRaises(TypeError): P(b'a') - with self.assertRaisesRegex(TypeError, message): + with self.assertRaises(TypeError): P(b'a', 'b') - with self.assertRaisesRegex(TypeError, message): + with self.assertRaises(TypeError): P('a', b'b') with self.assertRaises(TypeError): P('a').joinpath(b'b') @@ -267,7 +257,7 @@ def test_repr_roundtrips(self): p = self.cls(pathstr) r = repr(p) # The repr() roundtrips. - q = eval(r, pathlib.__dict__) + q = eval(r, upath.implementations.local.__dict__) self.assertIs(q.__class__, p.__class__) self.assertEqual(q, p) self.assertEqual(repr(q), r) @@ -556,6 +546,7 @@ def test_with_name_common(self): self.assertRaises(ValueError, P('.').with_name, 'd.xml') self.assertRaises(ValueError, P('/').with_name, 'd.xml') self.assertRaises(ValueError, P('a/b').with_name, '') + # self.assertRaises(ValueError, P('a/b').with_name, '.') self.assertRaises(ValueError, P('a/b').with_name, '/c') self.assertRaises(ValueError, P('a/b').with_name, 'c/') self.assertRaises(ValueError, P('a/b').with_name, 'c/d') @@ -573,6 +564,7 @@ def test_with_stem_common(self): self.assertRaises(ValueError, P('.').with_stem, 'd') self.assertRaises(ValueError, P('/').with_stem, 'd') self.assertRaises(ValueError, P('a/b').with_stem, '') + # self.assertRaises(ValueError, P('a/b').with_stem, '.') self.assertRaises(ValueError, P('a/b').with_stem, '/c') self.assertRaises(ValueError, P('a/b').with_stem, 'c/') self.assertRaises(ValueError, P('a/b').with_stem, 'c/d') @@ -636,8 +628,14 @@ def test_relative_to_common(self): self.assertRaises(ValueError, p.relative_to, P('a/b/c')) self.assertRaises(ValueError, p.relative_to, P('a/c')) self.assertRaises(ValueError, p.relative_to, P('/a')) + self.assertRaises(ValueError, p.relative_to, P("../a")) + self.assertRaises(ValueError, p.relative_to, P("a/..")) + self.assertRaises(ValueError, p.relative_to, P("/a/..")) self.assertRaises(ValueError, p.relative_to, P('/'), walk_up=True) self.assertRaises(ValueError, p.relative_to, P('/a'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P("../a"), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P("a/.."), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P("/a/.."), walk_up=True) p = P('/a/b') self.assertEqual(p.relative_to(P('/')), P('a/b')) self.assertEqual(p.relative_to('/'), P('a/b')) @@ -666,8 +664,14 @@ def test_relative_to_common(self): self.assertRaises(ValueError, p.relative_to, P()) self.assertRaises(ValueError, p.relative_to, '') self.assertRaises(ValueError, p.relative_to, P('a')) + self.assertRaises(ValueError, p.relative_to, P("../a")) + self.assertRaises(ValueError, p.relative_to, P("a/..")) + self.assertRaises(ValueError, p.relative_to, P("/a/..")) self.assertRaises(ValueError, p.relative_to, P(''), walk_up=True) self.assertRaises(ValueError, p.relative_to, P('a'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P("../a"), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P("a/.."), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P("/a/.."), walk_up=True) def test_is_relative_to_common(self): P = self.cls @@ -1165,9 +1169,9 @@ def test_with_name(self): self.assertRaises(ValueError, P('c:').with_name, 'd.xml') self.assertRaises(ValueError, P('c:/').with_name, 'd.xml') self.assertRaises(ValueError, P('//My/Share').with_name, 'd.xml') - self.assertRaises(ValueError, P('c:a/b').with_name, 'd:') - self.assertRaises(ValueError, P('c:a/b').with_name, 'd:e') - self.assertRaises(ValueError, P('c:a/b').with_name, 'd:/e') + # self.assertRaises(ValueError, P('c:a/b').with_name, 'd:') + # self.assertRaises(ValueError, P('c:a/b').with_name, 'd:e') + # self.assertRaises(ValueError, P('c:a/b').with_name, 'd:/e') self.assertRaises(ValueError, P('c:a/b').with_name, '//My/Share') def test_with_stem(self): @@ -1179,9 +1183,9 @@ def test_with_stem(self): self.assertRaises(ValueError, P('c:').with_stem, 'd') self.assertRaises(ValueError, P('c:/').with_stem, 'd') self.assertRaises(ValueError, P('//My/Share').with_stem, 'd') - self.assertRaises(ValueError, P('c:a/b').with_stem, 'd:') - self.assertRaises(ValueError, P('c:a/b').with_stem, 'd:e') - self.assertRaises(ValueError, P('c:a/b').with_stem, 'd:/e') + # self.assertRaises(ValueError, P('c:a/b').with_stem, 'd:') + # self.assertRaises(ValueError, P('c:a/b').with_stem, 'd:e') + # self.assertRaises(ValueError, P('c:a/b').with_stem, 'd:/e') self.assertRaises(ValueError, P('c:a/b').with_stem, '//My/Share') def test_with_suffix(self): @@ -2705,9 +2709,9 @@ def test_complex_symlinks_relative(self): def test_complex_symlinks_relative_dot_dot(self): self._check_complex_symlinks(os.path.join('dirA', '..')) - def test_passing_kwargs_deprecated(self): - with self.assertWarns(DeprecationWarning): - self.cls(foo="bar") + # def test_passing_kwargs_deprecated(self): + # with self.assertWarns(DeprecationWarning): + # self.cls(foo="bar") class WalkTests(unittest.TestCase): @@ -2922,7 +2926,7 @@ def test_walk_many_open_files(self): path = path / 'd' def test_walk_above_recursion_limit(self): - recursion_limit = 40 + recursion_limit = 50 # directory_depth > recursion_limit directory_depth = recursion_limit + 10 base = UPath(os_helper.TESTFN, 'deep') @@ -2953,6 +2957,9 @@ def test_glob_empty_pattern(self): with self.assertRaisesRegex(ValueError, 'Unacceptable pattern'): list(p.glob('')) + @pytest.mark.xfail(reason="subclassing UPath directly for Posix and Windows paths requires protocol registration") + def test_with_segments(self): + super().test_with_segments() @only_posix class PosixPathTest(_BasePathTest, unittest.TestCase): @@ -3257,17 +3264,15 @@ def check(): check() -class PurePathSubclassTest(_BasePurePathTest): - class cls(pathlib.PurePath): - pass - - # repr() roundtripping is not supported in custom subclass. - test_repr_roundtrips = None - - class PathSubclassTest(_BasePathTest, unittest.TestCase): class cls(UPath): - pass + cwd = UPath.cwd + home = UPath.home # repr() roundtripping is not supported in custom subclass. test_repr_roundtrips = None + + @pytest.mark.xfail(reason="subsubclassing UPath directly for Posix and Windows paths requires protocol registration") + def test_with_segments(self): + super().test_with_segments() + diff --git a/upath/tests/test_core.py b/upath/tests/test_core.py index 8dd5a979..10b625e6 100644 --- a/upath/tests/test_core.py +++ b/upath/tests/test_core.py @@ -3,6 +3,7 @@ import pickle import sys import warnings +from typing import Mapping from urllib.parse import SplitResult import pytest @@ -26,7 +27,7 @@ def test_windows_path(local_testdir): assert isinstance(UPath(local_testdir), pathlib.WindowsPath) -def test_UPath_untested_protocol_warning(): +def test_UPath_untested_protocol_warning(clear_registry): with warnings.catch_warnings(record=True) as w: _ = UPath("mock:/") assert len(w) == 1 @@ -69,7 +70,7 @@ def test_home(self): def test_multiple_backend_paths(local_testdir): - path = f"s3:{local_testdir}" + path = "s3://bucket/" s3_path = UPath(path, anon=True) assert s3_path.joinpath("text.txt")._url.scheme == "s3" path = f"file://{local_testdir}" @@ -116,6 +117,7 @@ def test_instance_check_local_uri(local_testdir): assert isinstance(upath, UPath) +@pytest.mark.xfail(sys.version_info >= (3, 12), reason="requires python<3.12") def test_new_method(local_testdir): path = UPath.__new__(pathlib.Path, local_testdir) assert str(path) == str(pathlib.Path(local_testdir)) @@ -139,22 +141,18 @@ def test_create_from_type(path, storage_options, module, object_type): if module: # skip if module cannot be imported pytest.importorskip(module) - try: - upath = UPath(path, **storage_options) - # test expected object type - assert isinstance(upath, object_type) - cast = type(upath) - parent = upath.parent - # test derived object is same type - assert isinstance(parent, cast) - # test that created fs uses fsspec instance cache - assert not hasattr(upath, "fs") or upath.fs is parent.fs - new = cast(str(parent), **storage_options) - # test that object cast is same type - assert isinstance(new, cast) - except ImportError: - # fs failed to import - pass + upath = UPath(path, **storage_options) + # test expected object type + assert isinstance(upath, object_type) + cast = type(upath) + parent = upath.parent + # test derived object is same type + assert isinstance(parent, cast) + # test that created fs uses fsspec instance cache + assert upath.fs is parent.fs + new = cast(str(parent), **storage_options) + # test that object cast is same type + assert isinstance(new, cast) def test_list_args(): @@ -162,9 +160,9 @@ def test_list_args(): path_b = UPath("gcs://bucket") / "folder" assert str(path_a) == str(path_b) - assert path_a._root == path_b._root - assert path_a._drv == path_b._drv - assert path_a._parts == path_b._parts + assert path_a.root == path_b.root + assert path_a.drive == path_b.drive + assert path_a.parts == path_b.parts assert path_a._url == path_b._url @@ -173,9 +171,9 @@ def test_child_path(): path_b = UPath("gcs://bucket") / "folder" assert str(path_a) == str(path_b) - assert path_a._root == path_b._root - assert path_a._drv == path_b._drv - assert path_a._parts == path_b._parts + assert path_a.root == path_b.root + assert path_a.drive == path_b.drive + assert path_a.parts == path_b.parts assert path_a._url == path_b._url @@ -184,7 +182,7 @@ def test_pickling(): pickled_path = pickle.dumps(path) recovered_path = pickle.loads(pickled_path) - assert type(path) == type(recovered_path) + assert type(path) is type(recovered_path) assert str(path) == str(recovered_path) assert path.storage_options == recovered_path.storage_options @@ -194,11 +192,11 @@ def test_pickling_child_path(): pickled_path = pickle.dumps(path) recovered_path = pickle.loads(pickled_path) - assert type(path) == type(recovered_path) + assert type(path) is type(recovered_path) assert str(path) == str(recovered_path) - assert path._drv == recovered_path._drv - assert path._root == recovered_path._root - assert path._parts == recovered_path._parts + assert path.drive == recovered_path.drive + assert path.root == recovered_path.root + assert path.parts == recovered_path.parts assert path.storage_options == recovered_path.storage_options @@ -206,11 +204,11 @@ def test_copy_path(): path = UPath("gcs://bucket/folder", token="anon") copy_path = UPath(path) - assert type(path) == type(copy_path) + assert type(path) is type(copy_path) assert str(path) == str(copy_path) - assert path._drv == copy_path._drv - assert path._root == copy_path._root - assert path._parts == copy_path._parts + assert path.drive == copy_path.drive + assert path.root == copy_path.root + assert path.parts == copy_path.parts assert path.storage_options == copy_path.storage_options @@ -218,18 +216,18 @@ def test_copy_path_posix(): path = UPath("/tmp/folder") copy_path = UPath(path) - assert type(path) == type(copy_path) + assert type(path) is type(copy_path) assert str(path) == str(copy_path) - assert path._drv == copy_path._drv - assert path._root == copy_path._root - assert path._parts == copy_path._parts + assert path.drive == copy_path.drive + assert path.root == copy_path.root + assert path.parts == copy_path.parts def test_copy_path_append(): path = UPath("/tmp/folder") copy_path = UPath(path, "folder2") - assert type(path) == type(copy_path) + assert type(path) is type(copy_path) assert str(path / "folder2") == str(copy_path) path = UPath("/tmp/folder") @@ -248,13 +246,19 @@ def test_copy_path_append(): [ os.getcwd(), pathlib.Path.cwd().as_uri(), - "mock:///abc", + pytest.param( + "mock:///abc", + marks=pytest.mark.skipif( + os.name == "nt", + reason="_url not well defined for mock filesystem on windows", + ), + ), ], ) def test_access_to_private_kwargs_and_url(urlpath): # fixme: this should be deprecated... pth = UPath(urlpath) - assert isinstance(pth._kwargs, dict) + assert isinstance(pth._kwargs, Mapping) assert pth._kwargs == {} assert isinstance(pth._url, SplitResult) assert pth._url.scheme == "" or pth._url.scheme in pth.fs.protocol @@ -270,10 +274,10 @@ def test_copy_path_append_kwargs(): path = UPath("gcs://bucket/folder", anon=True) copy_path = UPath(path, anon=False) - assert type(path) == type(copy_path) + assert type(path) is type(copy_path) assert str(path) == str(copy_path) - assert not copy_path._kwargs["anon"] - assert path._kwargs["anon"] + assert not copy_path.storage_options["anon"] + assert path.storage_options["anon"] def test_relative_to(): diff --git a/upath/tests/test_registry.py b/upath/tests/test_registry.py index 19f38d4e..58c2b16a 100644 --- a/upath/tests/test_registry.py +++ b/upath/tests/test_registry.py @@ -21,6 +21,7 @@ "memory", "s3", "s3a", + "webdav", "webdav+http", "webdav+https", }