Skip to content

Commit

Permalink
feat: add FluidPath and GCSPath.fluid method
Browse files Browse the repository at this point in the history
GCSPath wants to work with many kinds of paths, and it's not always clear upfront what kind of path a string represents. If you're on a local file system, the path "/usr/bin/something" may be totally valid, but as a GCSPath it isn't valid because there's no service scheme attached to it, e.g. "gs://bucket/usr/bin/something"

FluidPath is a Union of pathlib.Path and GCSPath which allows type-checking of the paths without needing explicit knowledge of what kind of path it is, until that knowledge is needed.

*note* I originally thought of using "UnionPath" instead of "FluidPath" but the intellisense for completing "GCSPath.union" was very crowded, and a helper should be easy to type with completion.
  • Loading branch information
justindujardin committed Apr 24, 2020
1 parent 98760fc commit 3393226
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 0 deletions.
1 change: 1 addition & 0 deletions gcspath/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
ClientError,
GCSPath,
PureGCSPath,
FluidPath,
clear_fs_cache,
get_fs_cache,
get_fs_client,
Expand Down
25 changes: 25 additions & 0 deletions gcspath/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,9 @@ def clear_fs_cache(force: bool = False) -> None:
shutil.rmtree(str(resolved))


FluidPath = Union["GCSPath", Path]


class GCSPath(Path, PureGCSPath):
"""Path subclass for GCS service.
Expand All @@ -131,6 +134,28 @@ def _init(self: PathType, template=None):
else:
self._accessor = template._accessor

@classmethod
def fluid(cls: PathType, path_candidate: Union[str, FluidPath]) -> FluidPath:
"""Helper to infer a pathlib.Path or GCSPath from an input path or string.
The returned type is a union of the potential `FluidPath` types and will
type-check correctly against the minimum overlapping APIs of all the input
types.
If you need to use specific implementation details of a type, you
will need to cast the return of this function to the desired type, e.g.
# Narrow the type a specific class
assert isinstance(path, GCSPath), "must be GCSPath"
# Use a member specific to that class
print(path.prefix)
"""
from_path: FluidPath = GCSPath(path_candidate)
if from_path.root in ["/", ""]:
from_path = Path(path_candidate)
return from_path

@classmethod
def from_bucket(cls: PathType, bucket_name: str) -> "GCSPath":
"""Helper to convert a bucket name into a GCSPath without needing
Expand Down
11 changes: 11 additions & 0 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
BucketClientFS,
BucketsAccessor,
BucketStat,
FluidPath,
GCSPath,
PureGCSPath,
clear_fs_cache,
Expand All @@ -38,6 +39,16 @@ def test_api_is_path_instance(with_adapter):
assert isinstance(blob, Path)


@pytest.mark.parametrize("adapter", TEST_ADAPTERS)
def test_api_fluid(with_adapter, bucket: str):
path: FluidPath = GCSPath.fluid(f"gs://{bucket}/fake-key")
assert isinstance(path, GCSPath)
path: FluidPath = GCSPath.fluid(f"foo/bar.txt")
assert isinstance(path, Path)
path: FluidPath = GCSPath.fluid(f"/dev/null")
assert isinstance(path, Path)


@pytest.mark.parametrize("adapter", TEST_ADAPTERS)
def test_api_path_to_local(with_adapter, bucket: str):
root: GCSPath = GCSPath.from_bucket(bucket) / "to_local"
Expand Down

0 comments on commit 3393226

Please sign in to comment.