Skip to content

Commit

Permalink
Merge pull request #95 from justindujardin/fix/windows-paths
Browse files Browse the repository at this point in the history
Fix Absolute Paths (Windows/Unix)
  • Loading branch information
justindujardin authored Nov 22, 2022
2 parents 490ecb8 + 674a109 commit ba5b5b3
Show file tree
Hide file tree
Showing 18 changed files with 278 additions and 44 deletions.
23 changes: 23 additions & 0 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,29 @@ jobs:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
run: sh tools/codecov.sh

build-windows:
runs-on: 'windows-latest'
steps:
- uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: 3.11
- name: Install dependencies
run: .\tools\setup.bat
- name: Build Wheel
run: .\tools\build.bat
- name: Remove Pathy folder
uses: JesseTG/rm@v1.0.3
with:
path: .\pathy
- name: Test Wheel (local only)
run: .\tools\test_package.bat
- name: Report Code Coverage
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
run: .\tools\codecov.bat

deploy:
runs-on: ubuntu-latest
needs: "build"
Expand Down
77 changes: 53 additions & 24 deletions pathy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@

_windows_flavour: Any = _WindowsFlavour() # type:ignore
_posix_flavour: Any = _PosixFlavour() # type:ignore
_drive_letters = [(f"{chr(ord('a') + i)}:") for i in range(26)] # lower case with :


@dataclass
Expand Down Expand Up @@ -91,7 +92,7 @@ def __init__(
name: str,
is_dir: bool = False,
size: int = -1,
last_modified: int = -1,
last_modified: Optional[int] = None,
raw: Optional[Blob] = None,
):
self.name = name
Expand Down Expand Up @@ -224,7 +225,13 @@ def owner(self, path: "Pathy") -> Optional[str]:

def resolve(self, path: "Pathy", strict: bool = False) -> "Pathy":
path_parts = str(path).replace(path.drive, "")
return Pathy(f"{path.drive}{os.path.abspath(path_parts)}")
resolved = f"{path.drive}{os.path.abspath(path_parts)}"
# On Windows the abspath normalization that happens replaces
# / with \\ so we have to revert it here to preserve the
# expected resolved path.
if path.drive.lower() not in _drive_letters:
resolved = resolved.replace("\\", "/")
return Pathy(resolved)

def mkdir(self, path: "Pathy", mode: int = 0) -> None:
bucket: Optional[Bucket] = self.lookup_bucket(path)
Expand Down Expand Up @@ -274,8 +281,6 @@ def scheme(self) -> str:
```python
assert Pathy("gs://foo/bar").scheme == "gs"
assert Pathy("file:///tmp/foo/bar").scheme == "file"
assert Pathy("/dev/null").scheme == ""
"""
# If there is no drive, return nothing
if self.drive == "":
Expand Down Expand Up @@ -316,6 +321,9 @@ def _absolute_path_validation(self) -> None:
def _format_parsed_parts(cls, drv: str, root: str, parts: List[str]) -> str:
# Bucket path "gs://foo/bar"
join_fn: Callable[[List[str]], str] = cls._flavour.join # type:ignore
# If the scheme is file: and it's Windows, use \\ slashes to join
if drv.lower() == "file:" and os.name == "nt":
join_fn = "\\".join
res: str
if drv and root:
res = f"{drv}//{root}/" + join_fn(parts[2:])
Expand Down Expand Up @@ -348,6 +356,15 @@ def ls(self: Any) -> Generator["BlobStat", None, None]:
name=str(blob.name), size=stat.size, last_modified=stat.last_modified
)

def iterdir(self: Any) -> Generator["BasePath", None, None]:
"""Iterate over the blobs found in the given bucket or blob prefix path."""
client: BucketClient = get_client(getattr(self, "scheme", "file"))
blobs: "ScanDirFS" = cast(
ScanDirFS, client.scandir(self, prefix=getattr(self, "prefix", None))
) # type:ignore
for blob in blobs:
yield self / blob.name


class BucketsAccessor:
"""Path access for python < 3.11"""
Expand All @@ -357,14 +374,26 @@ def scandir(self, target: Any) -> "PathyScanDir":
return client.scandir(target, prefix=getattr(target, "prefix", None))


class Pathy(Path, PurePathy):
class Pathy(PurePathy, BasePath):
"""Subclass of `pathlib.Path` that works with bucket APIs."""

__slots__ = ()
_accessor: BucketsAccessor = BucketsAccessor()
_NOT_SUPPORTED_MESSAGE = "{method} is an unsupported bucket operation"
_UNSUPPORTED_PATH = (
"absolute file paths must be initialized using Pathy.fluid(path)"
)
_client: Optional[BucketClient]

def __init__(self, *args: Any, **kwargs: Any) -> None:
super().__init__(**kwargs)
# Error when initializing paths without using Pathy.fluid if the
# path is an absolute system path (windows/unix)
root = str(self)[0] # "/tmp/path"
drv = str(self)[:2].lower() # C:\\tmp\\path
if root == "/" or drv in _drive_letters:
raise ValueError(Pathy._UNSUPPORTED_PATH)

def client(self, path: "Pathy") -> BucketClient:
return get_client(path.scheme)

Expand Down Expand Up @@ -399,10 +428,13 @@ def fluid(cls, path_candidate: Union[str, FluidPath]) -> FluidPath:
assert fluid_path.prefix == "foo.txt/"
```
"""
from_path: FluidPath = Pathy(path_candidate)
if from_path.root in ["/", ""]:
from_path = BasePath(path_candidate)
return from_path
try:
result = Pathy(path_candidate)
if result.root != "":
return result
except ValueError:
pass
return BasePath(path_candidate)

@classmethod
def from_bucket(cls, bucket_name: str, scheme: str = "gs") -> "Pathy":
Expand Down Expand Up @@ -481,11 +513,7 @@ def ls(self: "Pathy") -> Generator["BlobStat", None, None]:
Yields BlobStat objects for each found blob.
"""
blobs: "PathyScanDir" = self.client(self).scandir(
self, prefix=self.prefix
) # type:ignore
for blob in blobs:
yield blob._stat
yield from super().ls()

def stat( # type: ignore[override]
self: "Pathy", *, follow_symlinks: bool = True
Expand Down Expand Up @@ -546,19 +574,17 @@ def is_file(self: "Pathy") -> bool:

def iterdir(self: "Pathy") -> Generator["Pathy", None, None]:
"""Iterate over the blobs found in the given bucket or blob prefix path."""
self._absolute_path_validation()
for blob in self.ls():
yield self / blob.name
yield from cast(Generator["Pathy", None, None], super().iterdir())

def glob(self: "Pathy", pattern: str) -> Generator["Pathy", None, None]:
"""Perform a glob match relative to this Pathy instance, yielding all matched
blobs."""
yield from super().glob(pattern) # type:ignore
yield from cast(Generator["Pathy", None, None], super().glob(pattern))

def rglob(self: "Pathy", pattern: str) -> Generator["Pathy", None, None]:
"""Perform a recursive glob match relative to this Pathy instance, yielding
all matched blobs. Imagine adding "**/" before a call to glob."""
yield from super().rglob(pattern) # type:ignore
yield from cast(Generator["Pathy", None, None], super().rglob(pattern))

def open( # type:ignore
self: "Pathy",
Expand Down Expand Up @@ -626,8 +652,6 @@ def rename(self: "Pathy", target: Union[str, PurePath]) -> "Pathy": # type:igno
self_type = type(self)
result = target if isinstance(target, self_type) else self_type(target)
result._absolute_path_validation() # type:ignore
# super().rename(result)
# return result

client: BucketClient = self.client(self)
bucket: Bucket = client.get_bucket(self)
Expand Down Expand Up @@ -883,8 +907,9 @@ def get_blob(self, blob_name: str) -> Optional[BlobFS]: # type:ignore[override]
# https://docs.python.org/3/library/pathlib.html#pathlib.Path.owner
owner: Optional[str]
try:
owner = native_blob.owner()
except KeyError:
# path.owner() raises NotImplementedError on windows
owner = native_blob.owner() # type:ignore
except (KeyError, NotImplementedError):
owner = None
return BlobFS(
bucket=self,
Expand Down Expand Up @@ -941,6 +966,10 @@ def rmdir(self, path: Pathy) -> None:
full_path = self.full_path(path)
return shutil.rmtree(str(full_path))

def mkdir(self, path: "Pathy", mode: int = 0) -> None:
full_path = self.full_path(path)
os.makedirs(full_path, exist_ok=True)

def open(
self,
path: Pathy,
Expand Down Expand Up @@ -1065,7 +1094,7 @@ class ScanDirFS(PathyScanDir):

def scandir(self) -> Generator[BucketEntry, None, None]:
scan_path = self._client.root / self._path.root
if isinstance(self._path, BasePath):
if isinstance(self._path, BasePath) and not isinstance(self._path, Pathy):
scan_path = (
self._client.root / self._path.root
if not self._path.is_absolute()
Expand Down
26 changes: 13 additions & 13 deletions pathy/_tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,65 +39,65 @@ def test_base_home() -> None:


def test_base_expanduser() -> None:
path = Pathy("/fake-bucket/fake-key")
path = Pathy("gs://fake-bucket/fake-key")
with pytest.raises(NotImplementedError):
path.expanduser()


def test_base_chmod() -> None:
path = Pathy("/fake-bucket/fake-key")
path = Pathy("gs://fake-bucket/fake-key")
with pytest.raises(NotImplementedError):
path.chmod(0o666)


def test_base_lchmod() -> None:
path = Pathy("/fake-bucket/fake-key")
path = Pathy("gs://fake-bucket/fake-key")
with pytest.raises(NotImplementedError):
path.lchmod(0o666)


def test_base_group() -> None:
path = Pathy("/fake-bucket/fake-key")
path = Pathy("gs://fake-bucket/fake-key")
with pytest.raises(NotImplementedError):
path.group()


def test_base_is_mount() -> None:
assert not Pathy("/fake-bucket/fake-key").is_mount()
assert not Pathy("gs://fake-bucket/fake-key").is_mount()


def test_base_is_symlink() -> None:
assert not Pathy("/fake-bucket/fake-key").is_symlink()
assert not Pathy("gs://fake-bucket/fake-key").is_symlink()


def test_base_is_socket() -> None:
assert not Pathy("/fake-bucket/fake-key").is_socket()
assert not Pathy("gs://fake-bucket/fake-key").is_socket()


def test_base_is_fifo() -> None:
assert not Pathy("/fake-bucket/fake-key").is_fifo()
assert not Pathy("gs://fake-bucket/fake-key").is_fifo()


def test_base_is_block_device() -> None:
path = Pathy("/fake-bucket/fake-key")
path = Pathy("gs://fake-bucket/fake-key")
with pytest.raises(NotImplementedError):
path.is_block_device()


def test_base_is_char_device() -> None:
path = Pathy("/fake-bucket/fake-key")
path = Pathy("gs://fake-bucket/fake-key")
with pytest.raises(NotImplementedError):
path.is_char_device()


def test_base_lstat() -> None:
path = Pathy("/fake-bucket/fake-key")
path = Pathy("gs://fake-bucket/fake-key")
with pytest.raises(NotImplementedError):
path.lstat()


def test_base_symlink_to() -> None:
path = Pathy("/fake-bucket/fake-key")
path = Pathy("gs://fake-bucket/fake-key")
with pytest.raises(NotImplementedError):
path.symlink_to("file_name")

Expand Down Expand Up @@ -212,5 +212,5 @@ def test_bucket_entry_defaults() -> None:
assert "size" in repr(entry)
stat = entry.stat()
assert isinstance(stat, BlobStat)
assert stat.last_modified == -1
assert stat.last_modified is None
assert stat.size == -1
32 changes: 32 additions & 0 deletions pathy/_tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import pathlib
import shutil
import tempfile

import pytest
Expand Down Expand Up @@ -262,3 +263,34 @@ def test_cli_ls_local_files(with_adapter: str, bucket: str) -> None:
assert one in result.output
assert two in result.output
assert str(root / "folder") in result.output


def test_cli_ls_diff_years_modified() -> None:
import os

root = Pathy.fluid(tempfile.mkdtemp()) / ENV_ID / "ls_diff_year"
root.mkdir(parents=True, exist_ok=True)

# Create one file right now
new_path = root / "new_file.txt"
new_path.write_text("new")

# Create another and set its modified time to one year before now
old_path = root / "old_file.txt"
old_path.write_text("old")
old_stat = old_path.stat()
assert isinstance(old_stat, os.stat_result), "expect local file"
one_year = 31556926 # seconds
os.utime(
str(old_path), (old_stat.st_atime - one_year, old_stat.st_mtime - one_year)
)
new_old_stat = old_path.stat()
assert isinstance(new_old_stat, os.stat_result), "expect local file"
assert int(old_stat.st_mtime) == int(new_old_stat.st_mtime + one_year)

result = runner.invoke(app, ["ls", "-l", str(root)])
assert result.exit_code == 0
assert str(old_path) in result.output
assert str(new_path) in result.output

shutil.rmtree(str(root))
5 changes: 3 additions & 2 deletions pathy/_tests/test_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def test_file_bucket_client_fs_open(with_adapter: str) -> None:
blob = Pathy("gs://foo/bar")
with pytest.raises(ClientError):
client.open(blob)
blob.mkdir()
blob.parent.mkdir(parents=True, exist_ok=True)
blob.touch()
assert client.open(blob) is not None

Expand All @@ -42,7 +42,8 @@ def test_file_bucket_client_fs_make_uri(with_adapter: str) -> None:
client: BucketClientFS = get_client("gs")
blob = Pathy("gs://foo/bar")
actual = client.make_uri(blob)
expected = f"file://{client.root}/foo/bar"
sep = client.root._flavour.sep # type:ignore
expected = f"file://{client.root}{sep}foo{sep}bar"
assert actual == expected

# Invalid root
Expand Down
10 changes: 10 additions & 0 deletions pathy/_tests/test_pathy.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,14 @@ def test_pathy_is_path_instance(with_adapter: str) -> None:
assert isinstance(blob, Path)


def test_pathy_error_invalid_scheme_paths() -> None:
# Initializing a path with an absolute system path warns
with pytest.raises(ValueError):
Pathy("c:\\temp\\other\\file.txt")
with pytest.raises(ValueError):
Pathy("/tmp/other/file.txt")


@pytest.mark.parametrize("adapter", TEST_ADAPTERS)
def test_pathy_fluid(with_adapter: str, bucket: str) -> None:
path: FluidPath = Pathy.fluid(f"{with_adapter}://{bucket}/{ENV_ID}/fake-key")
Expand All @@ -37,6 +45,8 @@ def test_pathy_fluid(with_adapter: str, bucket: str) -> None:
assert isinstance(path, BasePath)
path = Pathy.fluid("/dev/null")
assert isinstance(path, BasePath)
path = Pathy.fluid("C:\\Windows\\Path")
assert isinstance(path, BasePath)


@pytest.mark.parametrize("adapter", TEST_ADAPTERS)
Expand Down
1 change: 1 addition & 0 deletions pathy/_tests/test_pure_pathy.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def test_pure_pathy_scheme_extraction() -> None:
assert PurePathy("s3://var/tests/fake").scheme == "s3"
assert PurePathy("file://var/tests/fake").scheme == "file"
assert PurePathy("/var/tests/fake").scheme == ""
assert PurePathy("C:\\pathy\\subfolder").scheme == ""


@pytest.mark.skipif(sys.version_info < (3, 6), reason="requires python3.6 or higher")
Expand Down
Loading

0 comments on commit ba5b5b3

Please sign in to comment.