Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix/python3.9 #46

Merged
merged 6 commits into from
Feb 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ not supported by the bucket API provider.
## rename <kbd>method</kbd>

```python (doc)
Pathy.rename(self: 'Pathy', target: Union[str, pathlib.PurePath]) -> None
Pathy.rename(self: 'Pathy', target: Union[str, pathlib.PurePath]) -> 'Pathy'
```

Rename this path to the given target.
Expand All @@ -235,7 +235,7 @@ to match the target prefix.
## replace <kbd>method</kbd>

```python (doc)
Pathy.replace(self: 'Pathy', target: Union[str, pathlib.PurePath]) -> None
Pathy.replace(self: 'Pathy', target: Union[str, pathlib.PurePath]) -> 'Pathy'
```

Renames this path to the given target.
Expand Down
3 changes: 3 additions & 0 deletions codecov.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
coverage:
status:
patch: off
66 changes: 54 additions & 12 deletions pathy/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@
from typing import (
IO,
Any,
ContextManager,
Dict,
Generator,
Generic,
Iterator,
List,
Optional,
Tuple,
Expand Down Expand Up @@ -194,7 +196,7 @@ def scandir(
path: "Pathy" = None,
prefix: Optional[str] = None,
delimiter: Optional[str] = None,
) -> Generator[BucketEntry[BucketType, BucketBlobType], None, None]:
) -> "PathyScanDir":
raise NotImplementedError(SUBCLASS_ERROR)

def create_bucket(self, path: "Pathy") -> Bucket:
Expand Down Expand Up @@ -364,12 +366,13 @@ def exists(self, path: "Pathy") -> bool:
# Determine if the path exists according to the current adapter
return client.exists(path)

def scandir(self, path: "Pathy") -> Generator[BucketEntry, None, None]:
def scandir(self, path: "Pathy") -> "PathyScanDir":
return self.client(path).scandir(path, prefix=path.prefix)

def listdir(self, path: "Pathy") -> Generator[str, None, None]:
for entry in self.scandir(path):
yield entry.name
with self.scandir(path) as entries:
for entry in entries:
yield entry.name

def open(
self,
Expand Down Expand Up @@ -646,7 +649,7 @@ def open( # type:ignore
raise ValueError("binary mode doesn't take an encoding argument")

# Leftover pathlib internals stuff
if self._closed: # type:ignore
if hasattr(self, "_closed") and self._closed: # type:ignore
self._raise_closed() # type:ignore
return self._accessor.open(
self,
Expand Down Expand Up @@ -679,7 +682,7 @@ def resolve(self, strict: bool = False) -> "Pathy":
self._absolute_path_validation()
return self._accessor.resolve(self, strict=strict)

def rename(self: "Pathy", target: Union[str, PurePath]) -> None:
def rename(self: "Pathy", target: Union[str, PurePath]) -> "Pathy": # type:ignore
"""Rename this path to the given target.

If the target exists and is a file, it will be replaced silently if the user
Expand All @@ -689,16 +692,16 @@ def rename(self: "Pathy", target: Union[str, PurePath]) -> None:
to match the target prefix."""
self._absolute_path_validation()
self_type = type(self)
if not isinstance(target, self_type):
target = self_type(target)
target._absolute_path_validation() # type:ignore
super().rename(target)
result = target if isinstance(target, self_type) else self_type(target)
result._absolute_path_validation() # type:ignore
super().rename(result)
return result

def replace(self: "Pathy", target: Union[str, PurePath]) -> None:
def replace(self: "Pathy", target: Union[str, PurePath]) -> "Pathy": # type:ignore
"""Renames this path to the given target.

If target points to an existing path, it will be replaced."""
self.rename(target)
return self.rename(target)

def rmdir(self: "Pathy") -> None:
"""Removes this bucket or blob prefix. It must be empty."""
Expand Down Expand Up @@ -821,3 +824,42 @@ def symlink_to(
method=self.symlink_to.__qualname__
)
raise NotImplementedError(message)


class PathyScanDir(Iterator[Any], ContextManager[Any]):
"""A scandir implementation that works for all python 3.x versions.

Python < 3.7 requires that scandir be iterable so it can be converted
to a list of results.

Python >= 3.8 requires that scandir work as a context manager.
"""

def __init__(
self,
client: BucketClient,
path: Optional[PurePathy] = None,
prefix: Optional[str] = None,
delimiter: Optional[str] = None,
) -> None:
super().__init__()
self._client = client
self._path = path
self._prefix = prefix
self._delimiter = delimiter
self._generator = self.scandir()

def __enter__(self) -> Generator[BucketEntry, None, None]:
return self._generator

def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
pass

def __next__(self) -> Generator[BucketEntry, None, None]:
yield from self._generator

def __iter__(self) -> Generator[BucketEntry, None, None]:
yield from self._generator

def scandir(self) -> Generator[BucketEntry, None, None]:
raise NotImplementedError("must be implemented in a subclass")
78 changes: 43 additions & 35 deletions pathy/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
BucketEntry,
ClientError,
Pathy,
PathyScanDir,
PurePathy,
StreamableType,
)
Expand Down Expand Up @@ -182,44 +183,13 @@ def list_buckets(self, **kwargs: Dict[str, Any]) -> Generator[BucketFS, None, No
if f.is_dir():
yield BucketFS(f.name, f)

def scandir( # type:ignore[override]
def scandir(
self,
path: Pathy = None,
prefix: Optional[str] = None,
delimiter: Optional[str] = None,
) -> Generator[BucketEntry[BucketFS, pathlib.Path], None, None]:
if path is None or not path.root:
for bucket in self.list_buckets():
yield BucketEntryFS(bucket.name, is_dir=True, raw=None)
return
assert path is not None
assert path.root is not None
scan_path = self.root / path.root
if prefix is not None:
scan_path = scan_path / prefix
for dir_entry in scan_path.glob("*"):
if dir_entry.is_dir():
yield BucketEntryFS(dir_entry.name, is_dir=True, raw=None)
else:
file_path = pathlib.Path(dir_entry)
stat = file_path.stat()
file_size = stat.st_size
updated = int(round(stat.st_mtime_ns * 1000))
blob: Blob = BlobFS(
self.get_bucket(path),
name=dir_entry.name,
size=file_size,
updated=updated,
owner=None,
raw=file_path,
)
yield BucketEntryFS(
name=dir_entry.name,
is_dir=False,
size=file_size,
last_modified=updated,
raw=blob,
)
) -> PathyScanDir:
return _FSScanDir(client=self, path=path, prefix=prefix, delimiter=delimiter)

def list_blobs(
self,
Expand All @@ -233,7 +203,7 @@ def list_blobs(
scan_path = self.root / path.root
if prefix is not None:
scan_path = scan_path / prefix
elif prefix is not None:
elif prefix is not None and path.key is not None:
scan_path = scan_path / path.key

# Path to a file
Expand Down Expand Up @@ -265,3 +235,41 @@ def list_blobs(
owner=None,
raw=file_path,
)


class _FSScanDir(PathyScanDir):
_client: BucketClientFS

def scandir(self) -> Generator[BucketEntry[BucketFS, pathlib.Path], None, None]:
if self._path is None or not self._path.root:
for bucket in self._client.list_buckets():
yield BucketEntryFS(bucket.name, is_dir=True, raw=None)
return
assert self._path is not None
assert self._path.root is not None
scan_path = self._client.root / self._path.root
if self._prefix is not None:
scan_path = scan_path / self._prefix
for dir_entry in scan_path.glob("*"):
if dir_entry.is_dir():
yield BucketEntryFS(dir_entry.name, is_dir=True, raw=None)
else:
file_path = pathlib.Path(dir_entry)
stat = file_path.stat()
file_size = stat.st_size
updated = int(round(stat.st_mtime_ns * 1000))
blob: Blob = BlobFS(
self._client.get_bucket(self._path),
name=dir_entry.name,
size=file_size,
updated=updated,
owner=None,
raw=file_path,
)
yield BucketEntryFS(
name=dir_entry.name,
is_dir=False,
size=file_size,
last_modified=updated,
raw=blob,
)
103 changes: 59 additions & 44 deletions pathy/gcs.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
from dataclasses import dataclass
from typing import Any, Dict, Generator, List, Optional

from .base import Blob, Bucket, BucketClient, BucketEntry, ClientError, PurePathy
from .base import (
Blob,
Bucket,
BucketClient,
BucketEntry,
ClientError,
PathyScanDir,
PurePathy,
)

try:
from google.api_core import exceptions as gcs_errors # type:ignore
Expand Down Expand Up @@ -174,49 +182,8 @@ def scandir( # type:ignore[override]
path: Optional[PurePathy] = None,
prefix: Optional[str] = None,
delimiter: Optional[str] = None,
) -> Generator[BucketEntryGCS, None, None]: # type:ignore[override]
assert self.client is not None, _MISSING_DEPS
continuation_token = None
if path is None or not path.root:
gcs_bucket: GCSNativeBucket
for gcs_bucket in self.list_buckets():
yield BucketEntryGCS(gcs_bucket.name, is_dir=True, raw=None)
return
sep = path._flavour.sep
bucket = self.lookup_bucket(path)
if bucket is None:
return
while True:
if continuation_token:
response = self.client.list_blobs(
bucket.name,
prefix=prefix,
delimiter=sep,
page_token=continuation_token,
)
else:
response = self.client.list_blobs(
bucket.name, prefix=prefix, delimiter=sep
)
for page in response.pages:
for folder in list(page.prefixes):
full_name = folder[:-1] if folder.endswith(sep) else folder
name = full_name.split(sep)[-1]
if name:
yield BucketEntryGCS(name, is_dir=True, raw=None)
for item in page:
name = item.name.split(sep)[-1]
if name:
yield BucketEntryGCS(
name=name,
is_dir=False,
size=item.size,
last_modified=item.updated.timestamp(),
raw=item,
)
if response.next_page_token is None:
break
continuation_token = response.next_page_token
) -> PathyScanDir:
return _GCSScanDir(client=self, path=path, prefix=prefix, delimiter=delimiter)

def list_blobs(
self,
Expand Down Expand Up @@ -255,3 +222,51 @@ def list_blobs(
if response.next_page_token is None:
break
continuation_token = response.next_page_token


class _GCSScanDir(PathyScanDir):
_client: BucketClientGCS

def scandir(self) -> Generator[BucketEntryGCS, None, None]:
assert self._client.client is not None, _MISSING_DEPS
continuation_token = None
if self._path is None or not self._path.root:
gcs_bucket: GCSNativeBucket
for gcs_bucket in self._client.client.list_buckets():
yield BucketEntryGCS(gcs_bucket.name, is_dir=True, raw=None)
return
sep = self._path._flavour.sep
bucket = self._client.lookup_bucket(self._path)
if bucket is None:
return
while True:
if continuation_token:
response = self._client.client.list_blobs(
bucket.name,
prefix=self._prefix,
delimiter=sep,
page_token=continuation_token,
)
else:
response = self._client.client.list_blobs(
bucket.name, prefix=self._prefix, delimiter=sep
)
for page in response.pages:
for folder in list(page.prefixes):
full_name = folder[:-1] if folder.endswith(sep) else folder
name = full_name.split(sep)[-1]
if name:
yield BucketEntryGCS(name, is_dir=True, raw=None)
for item in page:
name = item.name.split(sep)[-1]
if name:
yield BucketEntryGCS(
name=name,
is_dir=False,
size=item.size,
last_modified=item.updated.timestamp(),
raw=item,
)
if response.next_page_token is None:
break
continuation_token = response.next_page_token
1 change: 1 addition & 0 deletions tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -763,6 +763,7 @@ def test_api_raises_with_no_known_bucket_clients_for_a_scheme(temp_folder):
assert isinstance(accessor.client(path), BucketClientFS)


@pytest.mark.skip("requires: https://github.com/explosion/thinc/pull/465")
def test_api_export_spacy_model(temp_folder):
"""spaCy model loading is one of the things we need to support"""
use_fs(temp_folder)
Expand Down
5 changes: 0 additions & 5 deletions tox.ini

This file was deleted.