Skip to content

Commit

Permalink
Set correct permissions on blob file (#1220)
Browse files Browse the repository at this point in the history
* Set correct permissions on blob file

* Hack to get current umask thread-safe
  • Loading branch information
Wauplin authored Nov 25, 2022
1 parent f33668d commit 5a85b24
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 2 deletions.
31 changes: 29 additions & 2 deletions src/huggingface_hub/file_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
import os
import re
import shutil
import stat
import tempfile
import uuid
import warnings
from contextlib import contextmanager
from dataclasses import dataclass
Expand Down Expand Up @@ -747,7 +749,7 @@ def _resumable_file_manager() -> Generator[io.BufferedWriter, None, None]:
)

logger.info("storing %s in cache at %s", url, cache_path)
os.replace(temp_file.name, cache_path)
_chmod_and_replace(temp_file.name, cache_path)

if force_filename is None:
logger.info("creating metadata file for %s", cache_path)
Expand Down Expand Up @@ -1249,7 +1251,7 @@ def _resumable_file_manager() -> Generator[io.BufferedWriter, None, None]:
)

logger.info("storing %s in cache at %s", url, blob_path)
os.replace(temp_file.name, blob_path)
_chmod_and_replace(temp_file.name, blob_path)

logger.info("creating pointer to %s from %s", blob_path, pointer_path)
_create_relative_symlink(blob_path, pointer_path, new_blob=True)
Expand Down Expand Up @@ -1401,3 +1403,28 @@ def _int_or_none(value: Optional[str]) -> Optional[int]:
return int(value) # type: ignore
except (TypeError, ValueError):
return None


def _chmod_and_replace(src: str, dst: str) -> None:
"""Set correct permission before moving a blob from tmp directory to cache dir.
Do not take into account the `umask` from the process as there is no convenient way
to get it that is thread-safe.
See:
- About umask: https://docs.python.org/3/library/os.html#os.umask
- Thread-safety: https://stackoverflow.com/a/70343066
- About solution: https://github.com/huggingface/huggingface_hub/pull/1220#issuecomment-1326211591
- Fix issue: https://github.com/huggingface/huggingface_hub/issues/1141
- Fix issue: https://github.com/huggingface/huggingface_hub/issues/1215
"""
# Get umask by creating a temporary file in the cached repo folder.
tmp_file = Path(dst).parent.parent / f"tmp_{uuid.uuid4()}"
try:
tmp_file.touch()
cache_dir_mode = Path(tmp_file).stat().st_mode
os.chmod(src, stat.S_IMODE(cache_dir_mode))
finally:
tmp_file.unlink()

os.replace(src, dst)
20 changes: 20 additions & 0 deletions tests/test_file_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.
import os
import re
import stat
import unittest
from pathlib import Path
from tempfile import TemporaryDirectory
Expand Down Expand Up @@ -242,6 +243,25 @@ def test_dataset_lfs_object(self):
(url, '"95aa6a52d5d6a735563366753ca50492a658031da74f301ac5238b03966972c9"'),
)

def test_hf_hub_download_custom_cache_permission(self):
"""Checks `hf_hub_download` respect the cache dir permission.
Regression test for #1141 #1215.
https://github.com/huggingface/huggingface_hub/issues/1141
https://github.com/huggingface/huggingface_hub/issues/1215
"""
with TemporaryDirectory() as tmpdir:
# Equivalent to umask u=rwx,g=r,o=
previous_umask = os.umask(0o037)
try:
filepath = hf_hub_download(
DUMMY_RENAMED_OLD_MODEL_ID, "config.json", cache_dir=tmpdir
)
# Permissions are honored (640: u=rw,g=r,o=)
self.assertEqual(stat.S_IMODE(os.stat(filepath).st_mode), 0o640)
finally:
os.umask(previous_umask)

def test_download_from_a_renamed_repo_with_hf_hub_download(self):
"""Checks `hf_hub_download` works also on a renamed repo.
Expand Down

0 comments on commit 5a85b24

Please sign in to comment.