diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 56ff79f..68d0397 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -49,6 +49,24 @@ jobs: - name: Run tests run: nox -s tests-${{ matrix.fsspec || matrix.pyv }} -- --cov-report=xml + typesafety: + runs-on: ubuntu-latest + + steps: + - name: Check out the repository + uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.pyv }} + uses: actions/setup-python@v4 + with: + python-version: '3.8' + + - name: Install nox + run: python -m pip install --upgrade nox + + - name: Run typesafety checks + run: nox -s typesafety + lint: runs-on: ubuntu-latest diff --git a/dev/generate_flavours.py b/dev/generate_flavours.py index f37780b..34caece 100644 --- a/dev/generate_flavours.py +++ b/dev/generate_flavours.py @@ -43,6 +43,7 @@ import logging import re from typing import Any +from typing import Literal from typing import cast from urllib.parse import parse_qs from urllib.parse import urlsplit @@ -67,6 +68,22 @@ class FileSystemFlavourBase: """base class for the fsspec flavours""" + protocol: str | tuple[str, ...] + root_marker: Literal["/", ""] + sep: Literal["/"] + + @classmethod + def _strip_protocol(cls, path): + raise NotImplementedError + + @staticmethod + def _get_kwargs_from_urls(path): + raise NotImplementedError + + @classmethod + def _parent(cls, path): + raise NotImplementedError + def __init_subclass__(cls: Any, **kwargs): if isinstance(cls.protocol, str): protocols = (cls.protocol,) @@ -99,12 +116,27 @@ def __init_subclass__(cls: Any, **kwargs): } +def _fix_abstract_file_system(x: str) -> str: + x = re.sub( + "protocol = 'abstract'", "protocol: str | tuple[str, ...] = 'abstract'", x + ) + x = re.sub("root_marker = ''", "root_marker: Literal['', '/'] = ''", x) + x = re.sub("sep = '/'", "sep: Literal['/'] = '/'", x) + return x + + def _fix_azure_blob_file_system(x: str) -> str: - return re.sub( - r"host = ops.get\(\"host\", None\)", - 'host: str | None = ops.get("host", None)', + x = re.sub( + r"if isinstance\(path, list\):", + "if isinstance(path, list): # type: ignore[unreachable]", x, ) + x = re.sub( + r"(return \[.*\])", + r"\1 # type: ignore[unreachable]", + x, + ) + return x def _fix_memfs_file_system(x: str) -> str: @@ -115,6 +147,15 @@ def _fix_memfs_file_system(x: str) -> str: ) +def _fix_oss_file_system(x: str) -> str: + x = re.sub( + r"path_string: str = stringify_path\(path\)", + "path_string = stringify_path(path)", + x, + ) + return x + + def _fix_xrootd_file_system(x: str) -> str: x = re.sub( r"client.URL", @@ -129,8 +170,10 @@ def _fix_xrootd_file_system(x: str) -> str: FIX_SOURCE = { + "AbstractFileSystem": _fix_abstract_file_system, "AzureBlobFileSystem": _fix_azure_blob_file_system, "MemFS": _fix_memfs_file_system, + "OSSFileSystem": _fix_oss_file_system, "XRootDFileSystem": _fix_xrootd_file_system, } @@ -303,7 +346,7 @@ def create_source() -> str: AbstractFileSystem, ["_strip_protocol", "_get_kwargs_from_urls", "_parent"], {}, - ["protocol", "root_marker"], + ["protocol", "root_marker", "sep"], cls_suffix=BASE_CLASS_NAME_SUFFIX, base_cls="FileSystemFlavourBase", ) diff --git a/dev/requirements.txt b/dev/requirements.txt new file mode 100644 index 0000000..e5036ba --- /dev/null +++ b/dev/requirements.txt @@ -0,0 +1,18 @@ +fsspec[git,hdfs,dask,http,sftp,smb]==2024.2.0 + +# these dependencies define their own filesystems +adlfs==2024.2.0 +boxfs==0.2.1 +dropboxdrivefs==1.3.1 +gcsfs==2024.2.0 +s3fs==2024.2.0 +ocifs==1.3.1 +webdav4[fsspec]==0.9.8 +# gfrivefs @ git+https://github.com/fsspec/gdrivefs@master broken ... +morefs[asynclocalfs]==0.2.0 +dvc==3.47.0 +huggingface_hub==0.20.3 +lakefs-spec==0.7.0 +ossfs==2023.12.0 +fsspec-xrootd==0.2.4 +wandbfs==0.0.2 diff --git a/noxfile.py b/noxfile.py index 53a4eb5..88b1cea 100644 --- a/noxfile.py +++ b/noxfile.py @@ -91,6 +91,22 @@ def type_checking(session): session.run("python", "-m", "mypy") +@nox.session +def typesafety(session): + session.install("-e", ".[tests]") + session.run( + "python", + "-m", + "pytest", + "-v", + "-p", + "pytest-mypy-plugins", + "--mypy-pyproject-toml-file", + "pyproject.toml", + "typesafety", + ) + + @nox.session() def smoke(session): print("please tun `nox -s tests` instead") diff --git a/pyproject.toml b/pyproject.toml index efd5993..56c4c0c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ force_single_line = true line_length = 88 [tool.pytest.ini_options] -addopts = "-ra -m 'not hdfs'" +addopts = "-ra -m 'not hdfs' -p no:pytest-mypy-plugins" markers = [ "hdfs: mark test as hdfs", "pathlib: mark cpython pathlib tests", @@ -61,7 +61,7 @@ exclude_lines = [ [tool.mypy] # Error output -show_column_numbers = true +show_column_numbers = false show_error_codes = true show_error_context = true show_traceback = true diff --git a/setup.cfg b/setup.cfg index f3a3cfe..6d67430 100644 --- a/setup.cfg +++ b/setup.cfg @@ -24,7 +24,7 @@ python_requires = >=3.8 zip_safe = False packages = find: install_requires= - fsspec>=2022.1.0 + fsspec >=2022.1.0,!=2024.3.1 [options.extras_require] tests = @@ -33,7 +33,8 @@ tests = pytest-cov==4.1.0 pytest-mock==3.12.0 pylint==2.17.4 - mypy==1.8.0 + mypy==1.10.0 + pytest-mypy-plugins==3.1.2 packaging dev = %(tests)s diff --git a/typesafety/test_upath_interface.yml b/typesafety/test_upath_interface.yml new file mode 100644 index 0000000..219b3a4 --- /dev/null +++ b/typesafety/test_upath_interface.yml @@ -0,0 +1,567 @@ +- case: upath_constructor + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("abc")) # N: Revealed type is "upath.core.UPath" + +# === special upath attributes and methods ============================ + +- case: upath_special_protocol + disable_cache: false + main: | + from upath import UPath + + p = UPath("abc") + reveal_type(p.protocol) # N: Revealed type is "builtins.str" + +- case: upath_special_storage_options + disable_cache: false + main: | + from upath import UPath + + p = UPath("abc") + reveal_type(p.storage_options) # N: Revealed type is "typing.Mapping[builtins.str, Any]" + +- case: upath_special_path + disable_cache: false + main: | + from upath import UPath + + p = UPath("abc") + reveal_type(p.path) # N: Revealed type is "builtins.str" + +- case: upath_special_fs + disable_cache: false + main: | + from upath import UPath + + p = UPath("abc") + # todo: this can change once fsspec is typed + reveal_type(p.fs) # N: Revealed type is "Any" + +- case: upath_special_joinuri + disable_cache: false + main: | + from upath import UPath + + p = UPath("abc") + reveal_type(p.joinuri("efg")) # N: Revealed type is "upath.core.UPath" + +- case: upath_special__url + disable_cache: false + main: | + from upath import UPath + + p = UPath("abc") + reveal_type(p._url) # NR: Revealed type is "[Tt]uple\[builtins.str, builtins.str, builtins.str, builtins.str, builtins.str, fallback=urllib.parse.SplitResult\]" + +# === upath pathlib.PurePath interface ================================ + +- case: upath_parts + disable_cache: false + main: | + from upath import UPath + + p = UPath("abc") + reveal_type(p.parts) # N: Revealed type is "builtins.tuple[builtins.str, ...]" + +- case: upath_drive + disable_cache: false + main: | + from upath import UPath + + p = UPath("abc") + reveal_type(p.drive) # N: Revealed type is "builtins.str" + +- case: upath_root + disable_cache: false + main: | + from upath import UPath + + p = UPath("abc") + reveal_type(p.root) # N: Revealed type is "builtins.str" + +- case: upath_anchor + disable_cache: false + main: | + from upath import UPath + + p = UPath("abc") + reveal_type(p.anchor) # N: Revealed type is "builtins.str" + +- case: upath_name + disable_cache: false + main: | + from upath import UPath + + p = UPath("abc") + reveal_type(p.name) # N: Revealed type is "builtins.str" + +- case: upath_suffix + disable_cache: false + main: | + from upath import UPath + + p = UPath("abc") + reveal_type(p.suffix) # N: Revealed type is "builtins.str" + +- case: upath_suffixes + disable_cache: false + main: | + from upath import UPath + + p = UPath("abc") + reveal_type(p.suffixes) # N: Revealed type is "builtins.list[builtins.str]" + +- case: upath_stem + disable_cache: false + main: | + from upath import UPath + + p = UPath("abc") + reveal_type(p.stem) # N: Revealed type is "builtins.str" + +- case: upath_hashable + disable_cache: false + main: | + from upath import UPath + + p = UPath("abc") + reveal_type(hash(p)) # N: Revealed type is "builtins.int" + +# __fspath__ + +- case: upath_sortable + disable_cache: false + main: | + from upath import UPath + + a = UPath("abc") + b = UPath("efg") + reveal_type(a < b) # N: Revealed type is "builtins.bool" + +- case: upath_truediv + disable_cache: false + main: | + from upath import UPath + + a = UPath("abc") / "efg" + reveal_type(a) # N: Revealed type is "upath.core.UPath" + +- case: upath_rtruediv + disable_cache: false + main: | + from upath import UPath + + a = "efg" / UPath("abc") + reveal_type(a) # N: Revealed type is "upath.core.UPath" + +# __bytes__ + +- case: upath_as_posix + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("a").as_posix()) # N: Revealed type is "builtins.str" + +- case: upath_as_uri + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("a").as_uri()) # N: Revealed type is "builtins.str" + +- case: upath_is_absolute + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("a").is_absolute()) # N: Revealed type is "builtins.bool" + +- case: upath_is_reserved + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("a").is_reserved()) # N: Revealed type is "builtins.bool" + +- case: upath_is_relative_to + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("a").is_relative_to("b")) # N: Revealed type is "builtins.bool" + +- case: upath_match + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("a").match("b")) # N: Revealed type is "builtins.bool" + +- case: upath_relative_to + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("a").relative_to("b")) # N: Revealed type is "upath.core.UPath" + +- case: upath_with_name + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("a").with_name("b")) # N: Revealed type is "upath.core.UPath" + +- case: upath_with_stem + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("a").with_stem("b")) # N: Revealed type is "upath.core.UPath" + +- case: upath_with_suffix + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("a").with_suffix("b")) # N: Revealed type is "upath.core.UPath" + +- case: upath_joinpath + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("abc").joinpath("efg")) # N: Revealed type is "upath.core.UPath" + +- case: upath_parents + disable_cache: false + main: | + from upath import UPath + + p = UPath("abc") + reveal_type(p.parents) # N: Revealed type is "typing.Sequence[upath.core.UPath]" + +- case: upath_parent + disable_cache: false + main: | + from upath import UPath + + p = UPath("abc") + reveal_type(p.parent) # N: Revealed type is "upath.core.UPath" + +- case: upath_with_segments + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("abc").with_segments("efg")) # N: Revealed type is "upath.core.UPath" + +# === upath pathlib.Path methods ====================================== + +- case: upath_cwd + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath.cwd()) # N: Revealed type is "upath.core.UPath" + +- case: upath_stat + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("abc").stat()) # N: Revealed type is "upath._stat.UPathStatResult" + +- case: upath_chmod + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("abc").chmod(0o777)) # N: Revealed type is "None" + +- case: upath_exists + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("abc").exists()) # N: Revealed type is "builtins.bool" + +- case: upath_glob + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("abc").glob("efg")) # N: Revealed type is "typing.Generator[upath.core.UPath, None, None]" + +- case: upath_rglob + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("abc").rglob("efg")) # N: Revealed type is "typing.Generator[upath.core.UPath, None, None]" + +- case: upath_is_dir + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("abc").is_dir()) # N: Revealed type is "builtins.bool" + +- case: upath_is_file + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("abc").is_file()) # N: Revealed type is "builtins.bool" + +- case: upath_is_symlink + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("abc").is_symlink()) # N: Revealed type is "builtins.bool" + +- case: upath_is_socket + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("abc").is_socket()) # N: Revealed type is "builtins.bool" + +- case: upath_is_fifo + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("abc").is_fifo()) # N: Revealed type is "builtins.bool" + +- case: upath_is_block_device + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("abc").is_block_device()) # N: Revealed type is "builtins.bool" + +- case: upath_is_char_device + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("abc").is_char_device()) # N: Revealed type is "builtins.bool" + +- case: upath_is_junction + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("abc").is_junction()) # N: Revealed type is "builtins.bool" + +- case: upath_iterdir + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("abc").iterdir()) # N: Revealed type is "typing.Generator[upath.core.UPath, None, None]" + +- case: upath_lchmod + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("abc").lchmod(0o777)) # N: Revealed type is "None" + +- case: upath_lstat + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("abc").lstat()) # N: Revealed type is "upath._stat.UPathStatResult" + +- case: upath_mkdir + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("abc").mkdir()) # N: Revealed type is "None" + +- case: upath_open_default + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("abc").open()) # N: Revealed type is "typing.TextIO" + +- case: upath_open_text + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("abc").open("r")) # N: Revealed type is "typing.TextIO" + +- case: upath_open_binary + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("abc").open("rb")) # N: Revealed type is "typing.BinaryIO" + +- case: upath_is_mount + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("abc").is_mount()) # N: Revealed type is "builtins.bool" + +- case: upath_readlink + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("abc").readlink()) # N: Revealed type is "upath.core.UPath" + +- case: upath_rename + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("abc").rename("efg")) # N: Revealed type is "upath.core.UPath" + +- case: upath_replace + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("abc").replace("efg")) # N: Revealed type is "upath.core.UPath" + +- case: upath_resolve + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("abc").resolve()) # N: Revealed type is "upath.core.UPath" + +- case: upath_rmdir + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("abc").rmdir()) # N: Revealed type is "None" + +- case: upath_symlink_to + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("abc").symlink_to("efg")) # N: Revealed type is "None" + +- case: upath_hardlink_to + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("abc").hardlink_to("efg")) # N: Revealed type is "None" + +- case: upath_touch + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("abc").touch()) # N: Revealed type is "None" + +- case: upath_unlink + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("abc").unlink()) # N: Revealed type is "None" + +- case: upath_home + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath.home()) # N: Revealed type is "upath.core.UPath" + +- case: upath_absolute + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("abc").absolute()) # N: Revealed type is "upath.core.UPath" + +- case: upath_expanduser + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("abc").expanduser()) # N: Revealed type is "upath.core.UPath" + +- case: upath_read_bytes + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("abc").read_bytes()) # N: Revealed type is "builtins.bytes" + +- case: upath_read_text + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("abc").read_text()) # N: Revealed type is "builtins.str" + +- case: upath_samefile + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("abc").samefile("efg")) # N: Revealed type is "builtins.bool" + +- case: upath_write_bytes + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("abc").write_bytes(b"efg")) # N: Revealed type is "builtins.int" + +- case: upath_write_text + disable_cache: false + main: | + from upath import UPath + + reveal_type(UPath("abc").write_text("efg")) # N: Revealed type is "builtins.int" + +- case: upath_link_to_py38 + disable_cache: false + mypy_config: python_version = 3.8 + main: | + from upath import UPath + + UPath("abc").link_to + +- case: upath_link_to_py312plus + disable_cache: false + mypy_config: python_version = 3.12 + main: | + from upath import UPath + + UPath("abc").link_to # E: "UPath" has no attribute "link_to" [attr-defined] + +- case: upath_walk_py38 + disable_cache: false + mypy_config: python_version = 3.8 + main: | + from upath import UPath + + UPath("abc").walk # E: "UPath" has no attribute "walk" [attr-defined] + +- case: upath_walk_py312plus + disable_cache: false + mypy_config: python_version = 3.12 + main: | + from upath import UPath + + reveal_type(UPath("abc").walk()) # N: Revealed type is "typing.Iterator[tuple[upath.core.UPath, builtins.list[builtins.str], builtins.list[builtins.str]]]" diff --git a/upath/_compat.py b/upath/_compat.py index cb6b45b..334888f 100644 --- a/upath/_compat.py +++ b/upath/_compat.py @@ -304,21 +304,6 @@ def parts(self): else: return tuple(self._tail) - def joinpath(self, *pathsegments): - return self.with_segments(self, *pathsegments) - - def __truediv__(self, key): - try: - return self.joinpath(key) - except TypeError: - return NotImplemented - - def __rtruediv__(self, key): - try: - return self.with_segments(key, self) - except TypeError: - return NotImplemented - @property def parent(self): drv = self.drive @@ -490,7 +475,8 @@ def mv(self, path, target, recursive=False, maxdepth=None, **kwargs): ) -F = TypeVar("F") +RT = TypeVar("RT") +F = Callable[..., RT] def deprecated(*, python_version: tuple[int, ...]) -> Callable[[F], F]: diff --git a/upath/_flavour.py b/upath/_flavour.py index 5489960..a144bb0 100644 --- a/upath/_flavour.py +++ b/upath/_flavour.py @@ -19,7 +19,7 @@ TypeAlias = Any from fsspec.registry import known_implementations -from fsspec.registry import registry as class_registry +from fsspec.registry import registry as _class_registry from fsspec.spec import AbstractFileSystem from upath._compat import deprecated @@ -40,14 +40,14 @@ "upath_get_kwargs_from_url", ] -class_registry: Mapping[str, type[AbstractFileSystem]] +class_registry: Mapping[str, type[AbstractFileSystem]] = _class_registry PathOrStr: TypeAlias = Union[str, "os.PathLike[str]"] class AnyProtocolFileSystemFlavour(FileSystemFlavourBase): - sep: str = "/" - protocol: tuple[str, ...] = () - root_marker: str = "/" + sep = "/" + protocol = () + root_marker = "/" @classmethod def _strip_protocol(cls, path: str) -> str: @@ -168,10 +168,11 @@ def from_protocol( ) -> WrappedFileSystemFlavour: """return the fsspec flavour for the given protocol""" + _c = cls.protocol_config config = { - key: True - for key, protocols in cls.protocol_config.items() - if protocol in protocols + "netloc_is_anchor": protocol in _c["netloc_is_anchor"], + "supports_empty_parts": protocol in _c["supports_empty_parts"], + "meaningful_trailing_slash": protocol in _c["meaningful_trailing_slash"], } # first try to get an already imported fsspec filesystem class @@ -227,16 +228,12 @@ def stringify_path(pth: PathOrStr) -> str: out = pth.__fspath__() elif isinstance(pth, os.PathLike): out = str(pth) - elif hasattr(pth, "path"): + elif hasattr(pth, "path"): # type: ignore[unreachable] out = pth.path else: out = str(pth) return normalize_empty_netloc(out) - def empty_part_join(self, path: str, *paths: str) -> str: - sep = self.sep - return sep.join([str_remove_suffix(path, sep), *paths]) - def strip_protocol(self, pth: PathOrStr) -> str: pth = self.stringify_path(pth) return self._spec._strip_protocol(pth) @@ -270,21 +267,21 @@ def isabs(self, path: PathOrStr) -> bool: return path.startswith(self.root_marker) def join(self, path: PathOrStr, *paths: PathOrStr) -> str: - if self.supports_empty_parts: - _join = self.empty_part_join - else: - _join = posixpath.join if self.netloc_is_anchor: drv, p0 = self.splitdrive(path) pN = list(map(self.stringify_path, paths)) if not drv and not p0: path, *pN = pN drv, p0 = self.splitdrive(path) - return drv + _join(p0 or self.sep, *pN) + p0 = p0 or self.sep else: p0 = str(self.strip_protocol(path)) - pN = map(self.stringify_path, paths) - return _join(p0, *pN) + pN = list(map(self.stringify_path, paths)) + drv = "" + if self.supports_empty_parts: + return drv + self.sep.join([str_remove_suffix(p0, self.sep), *pN]) + else: + return drv + posixpath.join(p0, *pN) def split(self, path: PathOrStr): stripped_path = self.strip_protocol(path) @@ -385,20 +382,21 @@ class LazyFlavourDescriptor: """descriptor to lazily get the flavour for a given protocol""" def __init__(self) -> None: - self._owner = None + self._owner: type[UPath] | None = None def __set_name__(self, owner: type[UPath], name: str) -> None: # helper to provide a more informative repr self._owner = owner + self._default_protocol: str | None try: - self._default_protocol = self._owner.protocols[0] + self._default_protocol = self._owner.protocols[0] # type: ignore except (AttributeError, IndexError): self._default_protocol = None def __get__(self, instance: UPath, owner: type[UPath]) -> WrappedFileSystemFlavour: if instance is not None: return WrappedFileSystemFlavour.from_protocol(instance.protocol) - elif self._default_protocol: + elif self._default_protocol: # type: ignore return WrappedFileSystemFlavour.from_protocol(self._default_protocol) else: return default_flavour @@ -465,7 +463,7 @@ def upath_urijoin(base: str, uri: str) -> str: segments = base_parts + us.path.split("/") segments[1:-1] = filter(None, segments[1:-1]) - resolved_path = [] + resolved_path: list[str] = [] for seg in segments: if seg == "..": diff --git a/upath/_flavour_sources.py b/upath/_flavour_sources.py index ab22e01..e17d29d 100644 --- a/upath/_flavour_sources.py +++ b/upath/_flavour_sources.py @@ -33,6 +33,7 @@ import logging import re from typing import Any +from typing import Literal from typing import cast from urllib.parse import parse_qs from urllib.parse import urlsplit @@ -54,7 +55,24 @@ class FileSystemFlavourBase: """base class for the fsspec flavours""" + protocol: str | tuple[str, ...] + root_marker: Literal["/", ""] + sep: Literal["/"] + + @classmethod + def _strip_protocol(cls, path): + raise NotImplementedError + + @staticmethod + def _get_kwargs_from_urls(path): + raise NotImplementedError + + @classmethod + def _parent(cls, path): + raise NotImplementedError + def __init_subclass__(cls: Any, **kwargs): + protocols: tuple[str, ...] if isinstance(cls.protocol, str): protocols = (cls.protocol,) else: @@ -68,8 +86,9 @@ def __init_subclass__(cls: Any, **kwargs): class AbstractFileSystemFlavour(FileSystemFlavourBase): __orig_class__ = 'fsspec.spec.AbstractFileSystem' __orig_version__ = '2024.2.0' - protocol = 'abstract' - root_marker = '' + protocol: str | tuple[str, ...] = 'abstract' + root_marker: Literal['', '/'] = '' + sep: Literal['/'] = '/' @classmethod def _strip_protocol(cls, path): @@ -164,8 +183,8 @@ def _strip_protocol(cls, path: str): str Returns a path without the protocol """ - if isinstance(path, list): - return [cls._strip_protocol(p) for p in path] + if isinstance(path, list): # type: ignore[unreachable] + return [cls._strip_protocol(p) for p in path] # type: ignore[unreachable] STORE_SUFFIX = ".dfs.core.windows.net" logger.debug(f"_strip_protocol for {path}") @@ -197,7 +216,7 @@ def _get_kwargs_from_urls(urlpath): """Get the account_name from the urlpath and pass to storage_options""" ops = infer_storage_options(urlpath) out = {} - host: str | None = ops.get("host", None) + host = ops.get("host", None) if host: match = re.match( r"(?P.+)\.(dfs|blob)\.core\.windows\.net", host @@ -675,7 +694,7 @@ def _strip_protocol(cls, path): """ if isinstance(path, list): return [cls._strip_protocol(p) for p in path] - path_string: str = stringify_path(path) + path_string = stringify_path(path) if path_string.startswith("oss://"): path_string = path_string[5:] diff --git a/upath/_stat.py b/upath/_stat.py index e72b420..f2cbece 100644 --- a/upath/_stat.py +++ b/upath/_stat.py @@ -45,7 +45,7 @@ def _get_stat_result_extra_fields() -> tuple[str, ...]: sr = os.stat_result(range(os.stat_result.n_fields)) rd = sr.__reduce__() assert isinstance(rd, tuple), "unexpected return os.stat_result.__reduce__" - _, (_, extra) = sr.__reduce__() + _, (_, extra) = rd extra_fields = sorted(extra, key=extra.__getitem__) return tuple(extra_fields) @@ -317,7 +317,7 @@ def __iter__(self) -> Iterator[int]: for field in self._fields: yield int(getattr(self, field)) - def index(self, value: int, start: int = 0, stop: int = None, /) -> int: + def index(self, value: int, start: int = 0, stop: int | None = None, /) -> int: """the sequence interface index method.""" if stop is None: stop = len(self._seq) diff --git a/upath/core.py b/upath/core.py index a9058cc..b160eee 100644 --- a/upath/core.py +++ b/upath/core.py @@ -10,13 +10,20 @@ from typing import TYPE_CHECKING from typing import Any from typing import BinaryIO +from typing import Generator from typing import Literal from typing import Mapping +from typing import Sequence from typing import TextIO from typing import TypeVar from typing import overload from urllib.parse import urlsplit +if sys.version_info >= (3, 11): + from typing import Self +else: + from typing_extensions import Self + from fsspec.registry import get_filesystem_class from fsspec.spec import AbstractFileSystem @@ -32,6 +39,9 @@ from upath._stat import UPathStatResult from upath.registry import get_upath_class +if TYPE_CHECKING: + from urllib.parse import SplitResult + __all__ = ["UPath"] @@ -95,10 +105,28 @@ class UPath(PathlibPathShim, Path): "__root", "__parts", ) + if TYPE_CHECKING: + # public + anchor: str + drive: str + parent: Self + parents: Sequence[Self] + parts: tuple[str, ...] + root: str + stem: str + suffix: str + suffixes: list[str] + + def with_name(self, name: str) -> Self: ... + def with_stem(self, stem: str) -> Self: ... + def with_suffix(self, suffix: str) -> Self: ... + + # private attributes _protocol: str _storage_options: dict[str, Any] _fs_cached: AbstractFileSystem + _tail: str _protocol_dispatch: bool | None = None _flavour = LazyFlavourDescriptor() @@ -410,30 +438,33 @@ def _kwargs(self): return self.storage_options @property - def _url(self): + def _url(self) -> SplitResult: # TODO: # _url should be deprecated, but for now there is no good way of # accessing query parameters from urlpaths... return urlsplit(self.as_posix()) - def __getattr__(self, item): - if item == "_accessor": - warnings.warn( - "UPath._accessor is deprecated. Please use" - " UPath.fs instead. Follow the" - " universal_pathlib==0.2.0 migration guide at" - " https://github.com/fsspec/universal_pathlib for more" - " information.", - DeprecationWarning, - stacklevel=2, - ) - if hasattr(self, "_default_accessor"): - accessor_cls = self._default_accessor + if not TYPE_CHECKING: + # allow mypy to catch missing attributes + + def __getattr__(self, item): + if item == "_accessor": + warnings.warn( + "UPath._accessor is deprecated. Please use" + " UPath.fs instead. Follow the" + " universal_pathlib==0.2.0 migration guide at" + " https://github.com/fsspec/universal_pathlib for more" + " information.", + DeprecationWarning, + stacklevel=2, + ) + if hasattr(self, "_default_accessor"): + accessor_cls = self._default_accessor + else: + accessor_cls = FSSpecAccessorShim + return accessor_cls.from_path(self) else: - accessor_cls = FSSpecAccessorShim - return accessor_cls.from_path(self) - else: - raise AttributeError(item) + raise AttributeError(item) @classmethod def _from_parts(cls, parts, **kwargs): @@ -529,13 +560,28 @@ def __reduce__(self): } return _make_instance, (type(self), args, kwargs) - def with_segments(self, *pathsegments): + def with_segments(self, *pathsegments: str | os.PathLike[str]) -> Self: return type(self)( *pathsegments, protocol=self._protocol, **self._storage_options, ) + def joinpath(self, *pathsegments: str | os.PathLike[str]) -> Self: + return self.with_segments(self, *pathsegments) + + def __truediv__(self, key: str | os.PathLike[str]) -> Self: + try: + return self.joinpath(key) + except TypeError: + return NotImplemented + + def __rtruediv__(self, key: str | os.PathLike[str]) -> Self: + try: + return self.with_segments(key, self) + except TypeError: + return NotImplemented + # === upath.UPath non-standard changes ============================ # NOTE: @@ -642,13 +688,13 @@ def __bytes__(self): warnings.warn(msg, PendingDeprecationWarning, stacklevel=2) return os.fsencode(self) - def as_uri(self): + def as_uri(self) -> str: return str(self) - def is_reserved(self): + def is_reserved(self) -> bool: return False - def __eq__(self, other): + def __eq__(self, other: object) -> bool: """UPaths are considered equal if their protocol, path and storage_options are equal.""" if not isinstance(other, UPath): @@ -659,7 +705,7 @@ def __eq__(self, other): and self.storage_options == other.storage_options ) - def __hash__(self): + def __hash__(self) -> int: """The returned hash is based on the protocol and path only. Note: in the future, if hash collisions become an issue, we @@ -667,7 +713,13 @@ def __hash__(self): """ return hash((self.protocol, self.path)) - def relative_to(self, other, /, *_deprecated, walk_up=False): + def relative_to( # type: ignore[override] + self, + other, + /, + *_deprecated, + walk_up=False, + ) -> Self: if isinstance(other, UPath) and self.storage_options != other.storage_options: raise ValueError( "paths have different storage_options:" @@ -675,13 +727,13 @@ def relative_to(self, other, /, *_deprecated, walk_up=False): ) return super().relative_to(other, *_deprecated, walk_up=walk_up) - def is_relative_to(self, other, /, *_deprecated): + def is_relative_to(self, other, /, *_deprecated) -> bool: # type: ignore[override] if isinstance(other, UPath) and self.storage_options != other.storage_options: return False return super().is_relative_to(other, *_deprecated) @property - def name(self): + def name(self) -> str: tail = self._tail if not tail: return "" @@ -693,7 +745,11 @@ def name(self): # === pathlib.Path ================================================ - def stat(self, *, follow_symlinks=True) -> UPathStatResult: + def stat( # type: ignore[override] + self, + *, + follow_symlinks=True, + ) -> UPathStatResult: if not follow_symlinks: warnings.warn( "UPath.stat(follow_symlinks=False): follow_symlinks=False is" @@ -703,23 +759,23 @@ def stat(self, *, follow_symlinks=True) -> UPathStatResult: ) return UPathStatResult.from_info(self.fs.stat(self.path)) - def lstat(self): + def lstat(self) -> UPathStatResult: # type: ignore[override] # return self.stat(follow_symlinks=False) raise NotImplementedError - def exists(self, *, follow_symlinks=True): + def exists(self, *, follow_symlinks=True) -> bool: return self.fs.exists(self.path) - def is_dir(self): + def is_dir(self) -> bool: return self.fs.isdir(self.path) - def is_file(self): + def is_file(self) -> bool: return self.fs.isfile(self.path) - def is_mount(self): + def is_mount(self) -> bool: return False - def is_symlink(self): + def is_symlink(self) -> bool: try: info = self.fs.info(self.path) if "islink" in info: @@ -728,28 +784,28 @@ def is_symlink(self): return False return False - def is_junction(self): + def is_junction(self) -> bool: return False - def is_block_device(self): + def is_block_device(self) -> bool: return False - def is_char_device(self): + def is_char_device(self) -> bool: return False - def is_fifo(self): + def is_fifo(self) -> bool: return False - def is_socket(self): + def is_socket(self) -> bool: return False - def samefile(self, other_path): + def samefile(self, other_path) -> bool: raise NotImplementedError - @overload + @overload # type: ignore[override] def open( self, - mode: Literal["r", "w", "a"] = ..., + mode: Literal["r", "w", "a"] = "r", buffering: int = ..., encoding: str = ..., errors: str = ..., @@ -758,9 +814,9 @@ def open( ) -> TextIO: ... @overload - def open( + def open( # type: ignore[override] self, - mode: Literal["rb", "wb", "ab"] = ..., + mode: Literal["rb", "wb", "ab"], buffering: int = ..., encoding: str = ..., errors: str = ..., @@ -805,7 +861,7 @@ def open( fsspec_kwargs.setdefault("block_size", fsspec_kwargs.pop("buffering")) return self.fs.open(self.path, mode=mode, **fsspec_kwargs) - def iterdir(self): + def iterdir(self) -> Generator[UPath, None, None]: for name in self.fs.listdir(self.path): # fsspec returns dictionaries if isinstance(name, dict): @@ -825,7 +881,9 @@ def _make_child_relpath(self, name): del path._str # fix _str = str(self) assignment return path - def glob(self, pattern: str, *, case_sensitive=None): + def glob( + self, pattern: str, *, case_sensitive=None + ) -> Generator[UPath, None, None]: path_pattern = self.joinpath(pattern).path sep = self._flavour.sep base = self.fs._strip_protocol(self.path) @@ -833,7 +891,9 @@ def glob(self, pattern: str, *, case_sensitive=None): name = str_remove_prefix(str_remove_prefix(name, base), sep) yield self.joinpath(name) - def rglob(self, pattern: str, *, case_sensitive=None): + def rglob( + self, pattern: str, *, case_sensitive=None + ) -> Generator[UPath, None, None]: if _FSSPEC_HAS_WORKING_GLOB is None: _check_fsspec_has_working_glob() @@ -861,23 +921,23 @@ def rglob(self, pattern: str, *, case_sensitive=None): yield self.joinpath(name) @classmethod - def cwd(cls): + def cwd(cls) -> UPath: if cls is UPath: - return get_upath_class("").cwd() + return get_upath_class("").cwd() # type: ignore[union-attr] else: raise NotImplementedError @classmethod - def home(cls): + def home(cls) -> UPath: if cls is UPath: - return get_upath_class("").home() + return get_upath_class("").home() # type: ignore[union-attr] else: raise NotImplementedError - def absolute(self): + def absolute(self) -> Self: return self - def resolve(self, strict: bool = False): + def resolve(self, strict: bool = False) -> Self: _parts = self.parts # Do not attempt to normalize path if no parts are dots @@ -895,19 +955,19 @@ def resolve(self, strict: bool = False): return self.with_segments(*_parts[:1], *resolved) - def owner(self): + def owner(self) -> str: raise NotImplementedError - def group(self): + def group(self) -> str: raise NotImplementedError - def readlink(self): + def readlink(self) -> Self: raise NotImplementedError - def touch(self, mode=0o666, exist_ok=True): + def touch(self, mode=0o666, exist_ok=True) -> None: self.fs.touch(self.path, truncate=not exist_ok) - def mkdir(self, mode=0o777, parents=False, exist_ok=False): + def mkdir(self, mode=0o777, parents=False, exist_ok=False) -> None: if parents and not exist_ok and self.exists(): raise FileExistsError(str(self)) try: @@ -922,45 +982,63 @@ def mkdir(self, mode=0o777, parents=False, exist_ok=False): if not self.is_dir(): raise FileExistsError(str(self)) - def chmod(self, mode, *, follow_symlinks=True): + def chmod(self, mode: int, *, follow_symlinks: bool = True) -> None: raise NotImplementedError - def unlink(self, missing_ok=False): + def lchmod(self, mode: int) -> None: + raise NotImplementedError + + def unlink(self, missing_ok: bool = False) -> None: if not self.exists(): if not missing_ok: raise FileNotFoundError(str(self)) return self.fs.rm(self.path, recursive=False) - def rmdir(self, recursive: bool = True): # fixme: non-standard + def rmdir(self, recursive: bool = True) -> None: # fixme: non-standard if not self.is_dir(): raise NotADirectoryError(str(self)) - if not recursive and next(self.iterdir()): + if not recursive and next(self.iterdir()): # type: ignore[arg-type] raise OSError(f"Not recursive and directory not empty: {self}") self.fs.rm(self.path, recursive=recursive) def rename( - self, target, *, recursive=False, maxdepth=None, **kwargs - ): # fixme: non-standard + self, + target: str | os.PathLike[str] | UPath, + *, + recursive: bool = False, + maxdepth: int | None = None, + **kwargs: Any, + ) -> UPath: # fixme: non-standard + target_: UPath if not isinstance(target, UPath): - target = self.parent.joinpath(target).resolve() + target_ = self.parent.joinpath(target).resolve() + else: + target_ = target self.fs.mv( self.path, - target.path, + target_.path, recursive=recursive, maxdepth=maxdepth, **kwargs, ) - return target + return target_ - def replace(self, target): + def replace(self, target: str | os.PathLike[str] | UPath) -> UPath: raise NotImplementedError # todo - def symlink_to(self, target, target_is_directory=False): + def symlink_to( # type: ignore[override] + self, + target: str | os.PathLike[str] | UPath, + target_is_directory: bool = False, + ) -> None: raise NotImplementedError - def hardlink_to(self, target): + def hardlink_to( # type: ignore[override] + self, + target: str | os.PathLike[str] | UPath, + ) -> None: raise NotImplementedError - def expanduser(self): + def expanduser(self) -> Self: raise NotImplementedError diff --git a/upath/implementations/http.py b/upath/implementations/http.py index c759fb9..dbe18de 100644 --- a/upath/implementations/http.py +++ b/upath/implementations/http.py @@ -28,11 +28,11 @@ def _transform_init_args( ) -> tuple[tuple[str | os.PathLike, ...], str, dict[str, Any]]: # allow initialization via a path argument and protocol keyword if args and not str(args[0]).startswith(protocol): - args = (f"{protocol}://{args[0].lstrip('/')}", *args[1:]) + args = (f"{protocol}://{str(args[0]).lstrip('/')}", *args[1:]) return args, protocol, storage_options @property - def root(self) -> str: + def root(self) -> str: # type: ignore[override] return super().root or "/" def __str__(self): diff --git a/upath/implementations/local.py b/upath/implementations/local.py index b2ee1e5..4552585 100644 --- a/upath/implementations/local.py +++ b/upath/implementations/local.py @@ -101,17 +101,17 @@ def _upath_init(inst: PosixUPath | WindowsUPath) -> None: """helper to initialize the PosixPath/WindowsPath instance with UPath attrs""" inst._protocol = "" inst._storage_options = {} - if sys.version_info < (3, 10): + if sys.version_info < (3, 10) and hasattr(inst, "_init"): inst._init() -class PosixUPath(PosixPath, LocalPath): +class PosixUPath(PosixPath, LocalPath): # type: ignore[misc] __slots__ = () # assign all PosixPath methods/attrs to prevent multi inheritance issues _set_class_attributes(locals(), src=PosixPath) - def open( + def open( # type: ignore[override] self, mode="r", buffering=-1, @@ -136,14 +136,14 @@ def open( def __new__( cls, *args, protocol: str | None = None, **storage_options: Any - ) -> UPath: + ) -> PosixUPath: if os.name == "nt": raise NotImplementedError( f"cannot instantiate {cls.__name__} on your system" ) obj = super().__new__(cls, *args) obj._protocol = "" - return obj + return obj # type: ignore[return-value] def __init__( self, *args, protocol: str | None = None, **storage_options: Any @@ -169,13 +169,13 @@ def path(self) -> str: return PosixPath.__str__(self) -class WindowsUPath(WindowsPath, LocalPath): +class WindowsUPath(WindowsPath, LocalPath): # type: ignore[misc] __slots__ = () # assign all WindowsPath methods/attrs to prevent multi inheritance issues _set_class_attributes(locals(), src=WindowsPath) - def open( + def open( # type: ignore[override] self, mode="r", buffering=-1, @@ -200,14 +200,14 @@ def open( def __new__( cls, *args, protocol: str | None = None, **storage_options: Any - ) -> UPath: + ) -> WindowsUPath: if os.name != "nt": raise NotImplementedError( f"cannot instantiate {cls.__name__} on your system" ) obj = super().__new__(cls, *args) obj._protocol = "" - return obj + return obj # type: ignore[return-value] def __init__( self, *args, protocol: str | None = None, **storage_options: Any