Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cloud implementation of shutil.copy and shutil.copytree #142

Merged
merged 11 commits into from
May 29, 2021
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -179,11 +179,14 @@ Most methods and properties from `pathlib.Path` are supported except for the one
| `symlink_to` | ❌ | ❌ | ❌ |
| `with_stem` | ❌ | ❌ | ❌ |
| `cloud_prefix` | ✅ | ✅ | ✅ |
| `copy` | ✅ | ✅ | ✅ |
| `copytree` | ✅ | ✅ | ✅ |
| `download_to` | ✅ | ✅ | ✅ |
| `etag` | ✅ | ✅ | ✅ |
| `fspath` | ✅ | ✅ | ✅ |
| `is_valid_cloudpath` | ✅ | ✅ | ✅ |
| `rmtree` | ✅ | ✅ | ✅ |
| `upload_from` | ✅ | ✅ | ✅ |
| `blob` | ✅ | ❌ | ✅ |
| `bucket` | ❌ | ✅ | ✅ |
| `container` | ✅ | ❌ | ❌ |
Expand Down
7 changes: 5 additions & 2 deletions cloudpathlib/azure/azblobclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,9 @@ def _list_dir(

yield self.CloudPath(f"az://{cloud_path.container}/{o.name}")

def _move_file(self, src: AzureBlobPath, dst: AzureBlobPath) -> AzureBlobPath:
def _move_file(
self, src: AzureBlobPath, dst: AzureBlobPath, remove_src: bool = True
) -> AzureBlobPath:
# just a touch, so "REPLACE" metadata
if src == dst:
blob_client = self.service_client.get_blob_client(
Expand All @@ -189,7 +191,8 @@ def _move_file(self, src: AzureBlobPath, dst: AzureBlobPath) -> AzureBlobPath:

target.start_copy_from_url(source.url)

self._remove(src)
if remove_src:
self._remove(src)

return dst

Expand Down
4 changes: 3 additions & 1 deletion cloudpathlib/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,9 @@ def _list_dir(
pass

@abc.abstractmethod
def _move_file(self, src: BoundedCloudPath, dst: BoundedCloudPath) -> BoundedCloudPath:
def _move_file(
self, src: BoundedCloudPath, dst: BoundedCloudPath, remove_src: bool = True
pjbull marked this conversation as resolved.
Show resolved Hide resolved
) -> BoundedCloudPath:
pass

@abc.abstractmethod
Expand Down
121 changes: 109 additions & 12 deletions cloudpathlib/cloudpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -585,12 +585,12 @@ def read_text(self):
return self._dispatch_to_local_cache_path("read_text")

# =========== public cloud methods, not in pathlib ===============
def download_to(self, destination: Union[str, os.PathLike]):
def download_to(self, destination: Union[str, os.PathLike]) -> Union[str, os.PathLike]:
pjbull marked this conversation as resolved.
Show resolved Hide resolved
destination = Path(destination)
if self.is_file():
if destination.is_dir():
destination = destination / self.name
self.client._download_file(self, destination)
return self.client._download_file(self, destination)
else:
destination.mkdir(exist_ok=True)
for f in self.iterdir():
Expand All @@ -601,6 +601,8 @@ def download_to(self, destination: Union[str, os.PathLike]):
rel_dest = str(f)[len(rel) :]
f.download_to(destination / rel_dest)

return destination

def rmtree(self):
"""Delete an entire directory tree."""
if self.is_file():
Expand All @@ -609,6 +611,90 @@ def rmtree(self):
)
self.client._remove(self)

def upload_from(
self, source: Union[str, os.PathLike], force_overwrite_to_cloud: bool = False
pjbull marked this conversation as resolved.
Show resolved Hide resolved
) -> "CloudPath":
"""Upload a file or directory to the cloud path."""
source = Path(source)

if source.is_dir():
for p in source.iterdir():
(self / p.name).upload_from(p, force_overwrite_to_cloud=force_overwrite_to_cloud)

else:
if self.exists() and self.is_dir():
dst = self / source.name
else:
dst = self

dst._upload_file_to_cloud(source, force_overwrite_to_cloud=force_overwrite_to_cloud)

return self
pjbull marked this conversation as resolved.
Show resolved Hide resolved

def copy(
self,
destination: Union[str, os.PathLike, "CloudPath"],
force_overwrite_to_cloud: bool = False,
) -> Union[str, os.PathLike, "CloudPath"]:
pjbull marked this conversation as resolved.
Show resolved Hide resolved
"""Copy self to destination folder of file, if self is a file."""
if not self.exists() or not self.is_file():
raise ValueError(
f"Path {self} should be a file. To copy a directory tree use the method copytree."
)

if not isinstance(destination, CloudPath):
pjbull marked this conversation as resolved.
Show resolved Hide resolved
return self.download_to(destination)

# if same client, use cloud-native _move_file on client to avoid downloading
elif self.client is destination.client:
if (
not force_overwrite_to_cloud
and destination.exists()
and destination.stat().st_mtime >= self.stat().st_mtime
):
raise OverwriteNewerCloudError(
f"File ({destination}) is newer than ({self}). "
f"To overwrite "
f"pass `force_overwrite_to_cloud=True`."
)

return self.client._move_file(self, destination, remove_src=False)
pjbull marked this conversation as resolved.
Show resolved Hide resolved

else:
if not destination.exists() or destination.is_file():
return destination.upload_from(
self.fspath, force_overwrite_to_cloud=force_overwrite_to_cloud
)
else:
return (destination / self.name).upload_from(
self.fspath, force_overwrite_to_cloud=force_overwrite_to_cloud
)

def copytree(
self, destination: "CloudPath", force_overwrite_to_cloud: bool = False
pjbull marked this conversation as resolved.
Show resolved Hide resolved
pjbull marked this conversation as resolved.
Show resolved Hide resolved
) -> Union[os.PathLike, "CloudPath"]:
pjbull marked this conversation as resolved.
Show resolved Hide resolved
"""Copy self to a directory, if self is a directory."""
if not self.is_dir():
raise ValueError(
pjbull marked this conversation as resolved.
Show resolved Hide resolved
f"Origin path {self} must be a directory. To copy a single file use the method copy."
)
if destination.exists() and destination.is_file():
raise ValueError("Destination path {destination} of copytree must be a directory.")
pjbull marked this conversation as resolved.
Show resolved Hide resolved

destination.mkdir(parents=True, exist_ok=True)
pjbull marked this conversation as resolved.
Show resolved Hide resolved

for subpath in self.iterdir():
if subpath.is_file():
subpath.copy(
destination / subpath.name, force_overwrite_to_cloud=force_overwrite_to_cloud
)
elif subpath.is_dir():
subpath.copytree(
destination / subpath.name, force_overwrite_to_cloud=force_overwrite_to_cloud
)

return destination

# =========== private cloud methods ===============
@property
def _local(self):
Expand Down Expand Up @@ -673,11 +759,30 @@ def _refresh_cache(self, force_overwrite_from_cloud=False):
)

def _upload_local_to_cloud(self, force_overwrite_to_cloud: bool = False):
"""Uploads cache file at self._local to the cloud"""
# We should never try to be syncing entire directories; we should only
# cache and upload individual files.
if self._local.is_dir():
raise ValueError("Only individual files can be uploaded to the cloud")

uploaded = self._upload_file_to_cloud(
self._local, force_overwrite_to_cloud=force_overwrite_to_cloud
)

# force cache time to match cloud times
stats = self.stat()
os.utime(self._local, times=(stats.st_mtime, stats.st_mtime))

# reset dirty and handle now that this is uploaded
self._dirty = False
self._handle = None

return uploaded

def _upload_file_to_cloud(self, local_path, force_overwrite_to_cloud: bool = False):
"""Uploads file at `local_path` to the cloud if there is not a newer file
already there.
"""
try:
stats = self.stat()
except NoStatError:
Expand All @@ -686,22 +791,14 @@ def _upload_local_to_cloud(self, force_overwrite_to_cloud: bool = False):
# if cloud does not exist or local is newer or we are overwriting, do the upload
if (
not stats # cloud does not exist
or (self._local.stat().st_mtime > stats.st_mtime)
or (local_path.stat().st_mtime > stats.st_mtime)
or force_overwrite_to_cloud
):
self.client._upload_file(
self._local,
local_path,
self,
)

# force cache time to match cloud times
stats = self.stat()
os.utime(self._local, times=(stats.st_mtime, stats.st_mtime))

# reset dirty and handle now that this is uploaded
self._dirty = False
self._handle = None

return self

# cloud is newer and we are not overwriting
Expand Down
6 changes: 4 additions & 2 deletions cloudpathlib/gs/gsclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def _list_dir(self, cloud_path: GSPath, recursive=False) -> Iterable[GSPath]:

yield self.CloudPath(f"gs://{cloud_path.bucket}/{o.name}")

def _move_file(self, src: GSPath, dst: GSPath) -> GSPath:
def _move_file(self, src: GSPath, dst: GSPath, remove_src: bool = True) -> GSPath:
# just a touch, so "REPLACE" metadata
if src == dst:
bucket = self.client.bucket(src.bucket)
Expand All @@ -177,7 +177,9 @@ def _move_file(self, src: GSPath, dst: GSPath) -> GSPath:

src_blob = src_bucket.get_blob(src.blob)
src_bucket.copy_blob(src_blob, dst_bucket, dst.blob)
src_blob.delete()

if remove_src:
src_blob.delete()

return dst

Expand Down
10 changes: 8 additions & 2 deletions cloudpathlib/local/localclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,15 @@ def _list_dir(self, cloud_path: "LocalPath", recursive=False) -> Iterable["Local
def _md5(self, cloud_path: "LocalPath") -> str:
return md5(self._cloud_path_to_local(cloud_path).read_bytes()).hexdigest()

def _move_file(self, src: "LocalPath", dst: "LocalPath") -> "LocalPath":
def _move_file(
self, src: "LocalPath", dst: "LocalPath", remove_src: bool = True
) -> "LocalPath":
self._cloud_path_to_local(dst).parent.mkdir(exist_ok=True, parents=True)
self._cloud_path_to_local(src).replace(self._cloud_path_to_local(dst))

if remove_src:
self._cloud_path_to_local(src).replace(self._cloud_path_to_local(dst))
else:
shutil.copy(self._cloud_path_to_local(src), self._cloud_path_to_local(dst))
return dst

def _remove(self, cloud_path: "LocalPath") -> None:
Expand Down
5 changes: 3 additions & 2 deletions cloudpathlib/s3/s3client.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def _list_dir(self, cloud_path: S3Path, recursive=False) -> Iterable[S3Path]:
for result_key in result.get("Contents", []):
yield self.CloudPath(f"s3://{cloud_path.bucket}/{result_key.get('Key')}")

def _move_file(self, src: S3Path, dst: S3Path) -> S3Path:
def _move_file(self, src: S3Path, dst: S3Path, remove_src: bool = True) -> S3Path:
# just a touch, so "REPLACE" metadata
if src == dst:
o = self.s3.Object(src.bucket, src.key)
Expand All @@ -168,7 +168,8 @@ def _move_file(self, src: S3Path, dst: S3Path) -> S3Path:
target = self.s3.Object(dst.bucket, dst.key)
target.copy({"Bucket": src.bucket, "Key": src.key})

self._remove(src)
if remove_src:
self._remove(src)
return dst

def _remove(self, cloud_path: S3Path) -> None:
Expand Down
Loading