Skip to content

Commit

Permalink
refactor: rename PureGCSPath to PurePathy
Browse files Browse the repository at this point in the history
Be more consistent with the Pathy naming.

BREAKING CHANGE: PureGCSPath is now PurePathy
  • Loading branch information
justindujardin committed Aug 21, 2020
1 parent 0429358 commit 5632f26
Show file tree
Hide file tree
Showing 11 changed files with 149 additions and 162 deletions.
2 changes: 1 addition & 1 deletion pathy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
ClientBucket,
ClientError,
Pathy,
PureGCSPath,
PurePathy,
FluidPath,
clear_fs_cache,
get_fs_cache,
Expand Down
4 changes: 2 additions & 2 deletions pathy/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from google.cloud import storage

from . import gcs
from .base import PureGCSPath, PathType
from .base import PurePathy, PathType
from .client import (
BucketClient,
BucketEntry,
Expand Down Expand Up @@ -112,7 +112,7 @@ def clear_fs_cache(force: bool = False) -> None:
FluidPath = Union["Pathy", Path]


class Pathy(Path, PureGCSPath):
class Pathy(Path, PurePathy):
"""Path subclass for GCS service.
Write files to and read files from the GCS service using pathlib.Path
Expand Down
18 changes: 4 additions & 14 deletions pathy/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,31 +39,21 @@ def make_uri(self, path):
_gcs_flavour = _GCSFlavour()


class PureGCSPath(PurePath):
"""
PurePath subclass for GCS service.
GCS is not a file-system but we can look at it like a POSIX system.
"""
class PurePathy(PurePath):
"""PurePath subclass for bucket storage."""

_flavour = _gcs_flavour
__slots__ = ()

@property
def bucket(self):
"""
bucket property
return a new instance of only the bucket path
"""
"""Return a new instance of only the bucket path."""
self._absolute_path_validation()
return type(self)(f"{self.drive}//{self.root}")

@property
def key(self):
"""
key property
return a new instance of only the key path
"""
"""Return a new instance of only the key path."""
self._absolute_path_validation()
key = self._flavour.sep.join(self.parts[2:])
if not key or len(self.parts) < 2:
Expand Down
24 changes: 12 additions & 12 deletions pathy/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import smart_open

from .base import PureGCSPath
from .base import PurePathy

__all__ = (
"BucketStat",
Expand Down Expand Up @@ -118,16 +118,16 @@ def delete_blobs(self, blobs: List[ClientBlob]) -> None:
class BucketClient:
"""Base class for a client that interacts with a bucket-based storage system."""

def make_uri(self, path: PureGCSPath) -> str:
def make_uri(self, path: PurePathy) -> str:
return path.as_uri()

def is_dir(self, path: PureGCSPath) -> bool:
def is_dir(self, path: PurePathy) -> bool:
return any(self.list_blobs(path, prefix=path.prefix))

def rmdir(self, path: PureGCSPath) -> None:
def rmdir(self, path: PurePathy) -> None:
return None

def exists(self, path: PureGCSPath) -> bool:
def exists(self, path: PurePathy) -> bool:
# Because we want all the parents of a valid blob (e.g. "directory" in
# "directory/foo.file") to return True, we enumerate the blobs with a prefix
# and compare the object names to see if they match a substring of the path
Expand All @@ -141,7 +141,7 @@ def exists(self, path: PureGCSPath) -> bool:

def open(
self,
path: PureGCSPath,
path: PurePathy,
*,
mode="r",
buffering=-1,
Expand All @@ -160,18 +160,18 @@ def open(
ignore_ext=True,
)

def lookup_bucket(self, path: PureGCSPath) -> Optional[ClientBucket]:
def lookup_bucket(self, path: PurePathy) -> Optional[ClientBucket]:
raise NotImplementedError(_SUBCLASS_MUST_IMPLEMENT)

def get_bucket(self, path: PureGCSPath) -> ClientBucket:
def get_bucket(self, path: PurePathy) -> ClientBucket:
raise NotImplementedError(_SUBCLASS_MUST_IMPLEMENT)

def list_buckets(self) -> Generator[ClientBucket, None, None]:
raise NotImplementedError(_SUBCLASS_MUST_IMPLEMENT)

def list_blobs(
self,
path: PureGCSPath,
path: PurePathy,
prefix: Optional[str] = None,
delimiter: Optional[str] = None,
include_dirs: bool = False,
Expand All @@ -180,14 +180,14 @@ def list_blobs(

def scandir(
self,
path: PureGCSPath = None,
path: PurePathy = None,
prefix: Optional[str] = None,
delimiter: Optional[str] = None,
) -> Generator[BucketEntry[BucketType, BucketBlobType], None, None]:
raise NotImplementedError(_SUBCLASS_MUST_IMPLEMENT)

def create_bucket(self, path: PureGCSPath) -> ClientBucket:
def create_bucket(self, path: PurePathy) -> ClientBucket:
raise NotImplementedError(_SUBCLASS_MUST_IMPLEMENT)

def delete_bucket(self, path: PureGCSPath) -> None:
def delete_bucket(self, path: PurePathy) -> None:
raise NotImplementedError(_SUBCLASS_MUST_IMPLEMENT)
28 changes: 14 additions & 14 deletions pathy/file.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from dataclasses import dataclass, field
from typing import Optional, List, Generator, cast
from .client import BucketClient, ClientBucket, ClientBlob, ClientError, BucketEntry
from .base import PureGCSPath
from .base import PurePathy
import pathlib
import shutil
import os
Expand Down Expand Up @@ -79,32 +79,32 @@ class BucketClientFS(BucketClient):
# Root to store file-system buckets as children of
root: pathlib.Path

def make_uri(self, path: PureGCSPath):
def make_uri(self, path: PurePathy):
uri = super().make_uri(path)
return uri.replace("gs://", "file:///")

def full_path(self, path: PureGCSPath) -> pathlib.Path:
def full_path(self, path: PurePathy) -> pathlib.Path:
if path.root is None:
raise ValueError(f"Invalid bucket name for path: {path}")
full_path = self.root.absolute() / path.root
if path.key is not None:
full_path = full_path / path.key
return full_path

def exists(self, path: PureGCSPath) -> bool:
def exists(self, path: PurePathy) -> bool:
"""Return True if the path exists as a file or folder on disk"""
return self.full_path(path).exists()

def is_dir(self, path: PureGCSPath) -> bool:
def is_dir(self, path: PurePathy) -> bool:
return self.full_path(path).is_dir()

def rmdir(self, path: PureGCSPath) -> None:
def rmdir(self, path: PurePathy) -> None:
full_path = self.full_path(path)
return shutil.rmtree(str(full_path))

def open(
self,
path: PureGCSPath,
path: PurePathy,
*,
mode="r",
buffering=-1,
Expand All @@ -129,13 +129,13 @@ def open(
newline=newline,
)

def make_uri(self, path: PureGCSPath) -> str:
def make_uri(self, path: PurePathy) -> str:
if not path.root:
raise ValueError(f"cannot make a URI to an invalid bucket: {path.root}")
result = f"file://{self.root.absolute() / path.root / path.key}"
return result

def create_bucket(self, path: PureGCSPath) -> ClientBucket:
def create_bucket(self, path: PurePathy) -> ClientBucket:
if not path.root:
raise ValueError(f"Invalid bucket name: {path.root}")
bucket_path: pathlib.Path = self.root / path.root
Expand All @@ -144,19 +144,19 @@ def create_bucket(self, path: PureGCSPath) -> ClientBucket:
bucket_path.mkdir(parents=True, exist_ok=True)
return ClientBucketFS(str(path.root), bucket=bucket_path)

def delete_bucket(self, path: PureGCSPath) -> None:
def delete_bucket(self, path: PurePathy) -> None:
bucket_path: pathlib.Path = self.root / str(path.root)
if bucket_path.exists():
shutil.rmtree(bucket_path)

def lookup_bucket(self, path: PureGCSPath) -> Optional[ClientBucketFS]:
def lookup_bucket(self, path: PurePathy) -> Optional[ClientBucketFS]:
if path.root:
bucket_path: pathlib.Path = self.root / path.root
if bucket_path.exists():
return ClientBucketFS(str(path.root), bucket=bucket_path)
return None

def get_bucket(self, path: PureGCSPath) -> ClientBucketFS:
def get_bucket(self, path: PurePathy) -> ClientBucketFS:
if not path.root:
raise ValueError(f"path has an invalid bucket_name: {path.root}")
bucket_path: pathlib.Path = self.root / path.root
Expand All @@ -171,7 +171,7 @@ def list_buckets(self, **kwargs) -> Generator[ClientBucketFS, None, None]:

def scandir(
self,
path: Optional[PureGCSPath] = None,
path: Optional[PurePathy] = None,
prefix: Optional[str] = None,
delimiter: Optional[str] = None,
) -> Generator[BucketEntryFS, None, None]:
Expand Down Expand Up @@ -210,7 +210,7 @@ def scandir(

def list_blobs(
self,
path: PureGCSPath,
path: PurePathy,
prefix: Optional[str] = None,
delimiter: Optional[str] = None,
include_dirs: bool = False,
Expand Down
16 changes: 8 additions & 8 deletions pathy/gcs.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from dataclasses import dataclass, field
from typing import Optional, List, Generator
from .client import BucketClient, ClientBucket, ClientBlob, ClientError, BucketEntry
from .base import PureGCSPath
from .base import PurePathy

try:
from google.cloud import storage
Expand Down Expand Up @@ -71,17 +71,17 @@ def delete_blobs(self, blobs: List[ClientBlobGCS]) -> None:
class BucketClientGCS(BucketClient):
client: storage.Client = field(default_factory=lambda: storage.Client())

def make_uri(self, path: PureGCSPath):
def make_uri(self, path: PurePathy):
return str(path)

def create_bucket(self, path: PureGCSPath) -> ClientBucket:
def create_bucket(self, path: PurePathy) -> ClientBucket:
return self.client.create_bucket(path.root)

def delete_bucket(self, path: PureGCSPath) -> None:
def delete_bucket(self, path: PurePathy) -> None:
bucket = self.client.get_bucket(path.root)
bucket.delete()

def lookup_bucket(self, path: PureGCSPath) -> Optional[ClientBucketGCS]:
def lookup_bucket(self, path: PurePathy) -> Optional[ClientBucketGCS]:
try:
native_bucket = self.client.lookup_bucket(path.root)
if native_bucket is not None:
Expand All @@ -90,7 +90,7 @@ def lookup_bucket(self, path: PureGCSPath) -> Optional[ClientBucketGCS]:
pass
return None

def get_bucket(self, path: PureGCSPath) -> ClientBucketGCS:
def get_bucket(self, path: PurePathy) -> ClientBucketGCS:
try:
native_bucket = self.client.lookup_bucket(path.root)
if native_bucket is not None:
Expand All @@ -104,7 +104,7 @@ def list_buckets(self, **kwargs) -> Generator[ClientBucket, None, None]:

def scandir(
self,
path: Optional[PureGCSPath] = None,
path: Optional[PurePathy] = None,
prefix: Optional[str] = None,
delimiter: Optional[str] = None,
include_raw: bool = False,
Expand Down Expand Up @@ -152,7 +152,7 @@ def scandir(

def list_blobs(
self,
path: PureGCSPath,
path: PurePathy,
prefix: Optional[str] = None,
delimiter: Optional[str] = None,
include_dirs: bool = False,
Expand Down
4 changes: 2 additions & 2 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
BucketStat,
FluidPath,
Pathy,
PureGCSPath,
PurePathy,
clear_fs_cache,
get_fs_client,
use_fs,
Expand All @@ -29,7 +29,7 @@


def test_api_path_support():
assert PureGCSPath in Pathy.mro() # type: ignore
assert PurePathy in Pathy.mro() # type: ignore
assert Path in Pathy.mro() # type: ignore


Expand Down
Loading

0 comments on commit 5632f26

Please sign in to comment.