Skip to content

Commit

Permalink
Env vars for force overwrite functions (#437)
Browse files Browse the repository at this point in the history
* Env vars for force overwrite

* change history

* Defaults for force_overwrite_to_cloud

* overwrite to tests

* make sure times not equal in tests

* update test

* more snoozing

* sleep cloud

* sleepier

* update timing

* Sleep after delete

* try force gc
  • Loading branch information
pjbull authored Aug 12, 2024
1 parent 08b018b commit f36309f
Show file tree
Hide file tree
Showing 4 changed files with 151 additions and 22 deletions.
1 change: 1 addition & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
- fix: use native `exists()` method in `GSClient`. (PR [#420](https://github.com/drivendataorg/cloudpathlib/pull/420))
- Enhancement: lazy instantiation of default client (PR [#432](https://github.com/drivendataorg/cloudpathlib/issues/432), Issue [#428](https://github.com/drivendataorg/cloudpathlib/issues/428))
- Adds existence check before downloading in `download_to` (Issue [#430](https://github.com/drivendataorg/cloudpathlib/issues/430), PR [#432](https://github.com/drivendataorg/cloudpathlib/pull/432))
- Add env vars `CLOUDPATHLIB_FORCE_OVERWRITE_FROM_CLOUD` and `CLOUDPATHLIB_FORCE_OVERWRITE_TO_CLOUD`. (Issue [#393](https://github.com/drivendataorg/cloudpathlib/issues/393), PR [#437](https://github.com/drivendataorg/cloudpathlib/pull/437))

## v0.18.1 (2024-02-26)

Expand Down
55 changes: 35 additions & 20 deletions cloudpathlib/cloudpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ def __eq__(self, other: Any) -> bool:

def __fspath__(self) -> str:
if self.is_file():
self._refresh_cache(force_overwrite_from_cloud=False)
self._refresh_cache()
return str(self._local)

def __lt__(self, other: Any) -> bool:
Expand Down Expand Up @@ -549,8 +549,8 @@ def open(
encoding: Optional[str] = None,
errors: Optional[str] = None,
newline: Optional[str] = None,
force_overwrite_from_cloud: bool = False, # extra kwarg not in pathlib
force_overwrite_to_cloud: bool = False, # extra kwarg not in pathlib
force_overwrite_from_cloud: Optional[bool] = None, # extra kwarg not in pathlib
force_overwrite_to_cloud: Optional[bool] = None, # extra kwarg not in pathlib
) -> IO[Any]:
# if trying to call open on a directory that exists
if self.exists() and not self.is_file():
Expand Down Expand Up @@ -931,7 +931,7 @@ def rmtree(self) -> None:
def upload_from(
self,
source: Union[str, os.PathLike],
force_overwrite_to_cloud: bool = False,
force_overwrite_to_cloud: Optional[bool] = None,
) -> Self:
"""Upload a file or directory to the cloud path."""
source = Path(source)
Expand All @@ -956,24 +956,24 @@ def upload_from(
def copy(
self,
destination: Self,
force_overwrite_to_cloud: bool = False,
force_overwrite_to_cloud: Optional[bool] = None,
) -> Self: ...

@overload
def copy(
self,
destination: Path,
force_overwrite_to_cloud: bool = False,
force_overwrite_to_cloud: Optional[bool] = None,
) -> Path: ...

@overload
def copy(
self,
destination: str,
force_overwrite_to_cloud: bool = False,
force_overwrite_to_cloud: Optional[bool] = None,
) -> Union[Path, "CloudPath"]: ...

def copy(self, destination, force_overwrite_to_cloud=False):
def copy(self, destination, force_overwrite_to_cloud=None):
"""Copy self to destination folder of file, if self is a file."""
if not self.exists() or not self.is_file():
raise ValueError(
Expand All @@ -992,6 +992,11 @@ def copy(self, destination, force_overwrite_to_cloud=False):
if destination.exists() and destination.is_dir():
destination = destination / self.name

if force_overwrite_to_cloud is None:
force_overwrite_to_cloud = os.environ.get(
"CLOUDPATHLIB_FORCE_OVERWRITE_TO_CLOUD", "False"
).lower() in ["1", "true"]

if (
not force_overwrite_to_cloud
and destination.exists()
Expand Down Expand Up @@ -1019,27 +1024,27 @@ def copy(self, destination, force_overwrite_to_cloud=False):
def copytree(
self,
destination: Self,
force_overwrite_to_cloud: bool = False,
force_overwrite_to_cloud: Optional[bool] = None,
ignore: Optional[Callable[[str, Iterable[str]], Container[str]]] = None,
) -> Self: ...

@overload
def copytree(
self,
destination: Path,
force_overwrite_to_cloud: bool = False,
force_overwrite_to_cloud: Optional[bool] = None,
ignore: Optional[Callable[[str, Iterable[str]], Container[str]]] = None,
) -> Path: ...

@overload
def copytree(
self,
destination: str,
force_overwrite_to_cloud: bool = False,
force_overwrite_to_cloud: Optional[bool] = None,
ignore: Optional[Callable[[str, Iterable[str]], Container[str]]] = None,
) -> Union[Path, "CloudPath"]: ...

def copytree(self, destination, force_overwrite_to_cloud=False, ignore=None):
def copytree(self, destination, force_overwrite_to_cloud=None, ignore=None):
"""Copy self to a directory, if self is a directory."""
if not self.is_dir():
raise CloudPathNotADirectoryError(
Expand Down Expand Up @@ -1112,19 +1117,24 @@ def _new_cloudpath(self, path: Union[str, os.PathLike]) -> Self:

return self.client.CloudPath(path)

def _refresh_cache(self, force_overwrite_from_cloud: bool = False) -> None:
def _refresh_cache(self, force_overwrite_from_cloud: Optional[bool] = None) -> None:
try:
stats = self.stat()
except NoStatError:
# nothing to cache if the file does not exist; happens when creating
# new files that will be uploaded
return

if force_overwrite_from_cloud is None:
force_overwrite_from_cloud = os.environ.get(
"CLOUDPATHLIB_FORCE_OVERWRITE_FROM_CLOUD", "False"
).lower() in ["1", "true"]

# if not exist or cloud newer
if (
not self._local.exists()
force_overwrite_from_cloud
or not self._local.exists()
or (self._local.stat().st_mtime < stats.st_mtime)
or force_overwrite_from_cloud
):
# ensure there is a home for the file
self._local.parent.mkdir(parents=True, exist_ok=True)
Expand All @@ -1138,7 +1148,7 @@ def _refresh_cache(self, force_overwrite_from_cloud: bool = False) -> None:
f"Local file ({self._local}) for cloud path ({self}) has been changed by your code, but "
f"is being requested for download from cloud. Either (1) push your changes to the cloud, "
f"(2) remove the local file, or (3) pass `force_overwrite_from_cloud=True` to "
f"overwrite."
f"overwrite; or set env var CLOUDPATHLIB_FORCE_OVERWRITE_FROM_CLOUD=1."
)

# if local newer but not dirty, it was updated
Expand All @@ -1148,12 +1158,12 @@ def _refresh_cache(self, force_overwrite_from_cloud: bool = False) -> None:
f"Local file ({self._local}) for cloud path ({self}) is newer on disk, but "
f"is being requested for download from cloud. Either (1) push your changes to the cloud, "
f"(2) remove the local file, or (3) pass `force_overwrite_from_cloud=True` to "
f"overwrite."
f"overwrite; or set env var CLOUDPATHLIB_FORCE_OVERWRITE_FROM_CLOUD=1."
)

def _upload_local_to_cloud(
self,
force_overwrite_to_cloud: bool = False,
force_overwrite_to_cloud: Optional[bool] = None,
) -> Self:
"""Uploads cache file at self._local to the cloud"""
# We should never try to be syncing entire directories; we should only
Expand All @@ -1178,11 +1188,16 @@ def _upload_local_to_cloud(
def _upload_file_to_cloud(
self,
local_path: Path,
force_overwrite_to_cloud: bool = False,
force_overwrite_to_cloud: Optional[bool] = None,
) -> Self:
"""Uploads file at `local_path` to the cloud if there is not a newer file
already there.
"""
if force_overwrite_to_cloud is None:
force_overwrite_to_cloud = os.environ.get(
"CLOUDPATHLIB_FORCE_OVERWRITE_TO_CLOUD", "False"
).lower() in ["1", "true"]

if force_overwrite_to_cloud:
# If we are overwriting no need to perform any checks, so we can save time
self.client._upload_file(
Expand Down Expand Up @@ -1210,7 +1225,7 @@ def _upload_file_to_cloud(
f"Local file ({self._local}) for cloud path ({self}) is newer in the cloud disk, but "
f"is being requested to be uploaded to the cloud. Either (1) redownload changes from the cloud or "
f"(2) pass `force_overwrite_to_cloud=True` to "
f"overwrite."
f"overwrite; or set env var CLOUDPATHLIB_FORCE_OVERWRITE_TO_CLOUD=1."
)

# =========== pydantic integration special methods ===============
Expand Down
6 changes: 5 additions & 1 deletion docs/docs/caching.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -398,10 +398,14 @@
"\n",
"The `CloudPath.open` method supports a `force_overwrite_from_cloud` kwarg to force overwriting your local version.\n",
"\n",
"You can make overwriting the cache with the cloud copy the default by setting the environment variable `CLOUDPATHLIB_FORCE_OVERWRITE_FROM_CLOUD=1` or `CLOUDPATHLIB_FORCE_OVERWRITE_FROM_CLOUD=True`.\n",
"\n",
"`OverwriteNewerCloudError`\n",
"This exception is raised if we are asked to upload a file, but the one on the cloud is newer than our local version. This likely means that a separate process has updated the cloud version, and we don't want to overwrite and lose that new data in the cloud.\n",
"\n",
"The `CloudPath.open` method supports a `force_overwrite_to_cloud` kwarg to force overwriting the cloud version.\n",
"\n",
"You can make overwriting the cloud copy with the local one being uploaded by setting the environment variable `CLOUDPATHLIB_FORCE_OVERWRITE_TO_CLOUD=1` or `CLOUDPATHLIB_FORCE_OVERWRITE_TO_CLOUD=True`.\n",
"\n"
]
},
Expand Down Expand Up @@ -773,7 +777,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
"version": "3.12.1"
},
"vscode": {
"interpreter": {
Expand Down
111 changes: 110 additions & 1 deletion tests/test_caching.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
import gc
import os
from time import sleep
from pathlib import Path

import pytest

from cloudpathlib.enums import FileCacheMode
from cloudpathlib.exceptions import InvalidConfigurationException
from cloudpathlib.exceptions import (
InvalidConfigurationException,
OverwriteNewerCloudError,
OverwriteNewerLocalError,
)
from tests.conftest import CloudProviderTestRig


Expand Down Expand Up @@ -344,6 +349,107 @@ def test_environment_variable_local_cache_dir(rig: CloudProviderTestRig, tmpdir)
os.environ["CLOUDPATHLIB_LOCAL_CACHE_DIR"] = original_env_setting


def test_environment_variables_force_overwrite_from(rig: CloudProviderTestRig, tmpdir):
# environment instantiation
original_env_setting = os.environ.get("CLOUDPATHLIB_FORCE_OVERWRITE_FROM_CLOUD", "")

try:
# explicitly false overwrite
os.environ["CLOUDPATHLIB_FORCE_OVERWRITE_FROM_CLOUD"] = "False"

p = rig.create_cloud_path("dir_0/file0_0.txt")
p._refresh_cache() # dl to cache
p._local.touch() # update mod time

with pytest.raises(OverwriteNewerLocalError):
p._refresh_cache()

for val in ["1", "True", "TRUE"]:
os.environ["CLOUDPATHLIB_FORCE_OVERWRITE_FROM_CLOUD"] = val

p = rig.create_cloud_path("dir_0/file0_0.txt")

orig_mod_time = p.stat().st_mtime

p._refresh_cache() # dl to cache
p._local.touch() # update mod time

new_mod_time = p._local.stat().st_mtime

p._refresh_cache()
assert p._local.stat().st_mtime == orig_mod_time
assert p._local.stat().st_mtime < new_mod_time

finally:
os.environ["CLOUDPATHLIB_FORCE_OVERWRITE_FROM_CLOUD"] = original_env_setting


def test_environment_variables_force_overwrite_to(rig: CloudProviderTestRig, tmpdir):
# environment instantiation
original_env_setting = os.environ.get("CLOUDPATHLIB_FORCE_OVERWRITE_TO_CLOUD", "")

try:
# explicitly false overwrite
os.environ["CLOUDPATHLIB_FORCE_OVERWRITE_TO_CLOUD"] = "False"

p = rig.create_cloud_path("dir_0/file0_0.txt")

new_local = Path((tmpdir / "new_content.txt").strpath)
new_local.write_text("hello")
new_also_cloud = rig.create_cloud_path("dir_0/another_cloud_file.txt")
new_also_cloud.write_text("newer")

# make cloud newer than local or other cloud file
os.utime(new_local, (new_local.stat().st_mtime - 2, new_local.stat().st_mtime - 2))

p.write_text("updated")

with pytest.raises(OverwriteNewerCloudError):
p._upload_file_to_cloud(new_local)

with pytest.raises(OverwriteNewerCloudError):
# copy short-circuits upload if same client, so we test separately

# raises if destination is newer
new_also_cloud.write_text("newest")
sleep(0.01)
p.copy(new_also_cloud)

for val in ["1", "True", "TRUE"]:
os.environ["CLOUDPATHLIB_FORCE_OVERWRITE_TO_CLOUD"] = val

p = rig.create_cloud_path("dir_0/file0_0.txt")

new_local.write_text("updated")

# make cloud newer than local
os.utime(new_local, (new_local.stat().st_mtime - 2, new_local.stat().st_mtime - 2))

p.write_text("updated")

orig_cloud_mod_time = p.stat().st_mtime

assert p.stat().st_mtime >= new_local.stat().st_mtime

# would raise if not set
sleep(1.01) # give time so not equal when rounded
p._upload_file_to_cloud(new_local)
assert p.stat().st_mtime > orig_cloud_mod_time # cloud now overwritten

new_also_cloud = rig.create_cloud_path("dir_0/another_cloud_file.txt")
sleep(1.01) # give time so not equal when rounded
new_also_cloud.write_text("newer")

new_cloud_mod_time = new_also_cloud.stat().st_mtime

assert p.stat().st_mtime < new_cloud_mod_time # would raise if not set
p.copy(new_also_cloud)
assert new_also_cloud.stat().st_mtime >= new_cloud_mod_time

finally:
os.environ["CLOUDPATHLIB_FORCE_OVERWRITE_TO_CLOUD"] = original_env_setting


def test_manual_cache_clearing(rig: CloudProviderTestRig):
# use client that we can delete rather than default
client = rig.client_class(**rig.required_client_kwargs)
Expand Down Expand Up @@ -395,6 +501,9 @@ def test_manual_cache_clearing(rig: CloudProviderTestRig):
del cp
del client

gc.collect() # force gc before asserting
sleep(0.5) # give time to delete

assert not local_cache_path.exists()
assert not client_cache_folder.exists()

Expand Down

0 comments on commit f36309f

Please sign in to comment.