diff --git a/.github/workflows/build_repocard_examples.yaml b/.github/workflows/build_repocard_examples.yaml index d74f44b5f7..f966c7d826 100644 --- a/.github/workflows/build_repocard_examples.yaml +++ b/.github/workflows/build_repocard_examples.yaml @@ -6,7 +6,7 @@ on: - main env: - HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGINGFACE_PRODUCTION_USER_TOKEN }} + HF_TOKEN: ${{ secrets.HUGGINGFACE_PRODUCTION_USER_TOKEN }} jobs: build: diff --git a/docs/source/de/guides/manage-cache.md b/docs/source/de/guides/manage-cache.md index b3cc7f8c5e..e7b9c8a48b 100644 --- a/docs/source/de/guides/manage-cache.md +++ b/docs/source/de/guides/manage-cache.md @@ -21,7 +21,7 @@ Das Cache-System ist wie folgt aufgebaut: Der `` ist normalerweise das Home-Verzeichnis Ihres Benutzers. Es kann jedoch mit dem `cache_dir`-Argument in allen Methoden oder durch Angabe der Umgebungsvariablen -`HF_HOME` oder `HUGGINGFACE_HUB_CACHE` angepasst werden. +`HF_HOME` oder `HF_HUB_CACHE` angepasst werden. Modelle, Datensätze und Räume teilen eine gemeinsame Wurzel. Jedes dieser Repositories enthält den Repository-Typ, den Namensraum (Organisation oder Benutzername), diff --git a/docs/source/en/guides/manage-cache.md b/docs/source/en/guides/manage-cache.md index 9b9b1dd718..aad3be96c8 100644 --- a/docs/source/en/guides/manage-cache.md +++ b/docs/source/en/guides/manage-cache.md @@ -19,9 +19,7 @@ The caching system is designed as follows: ├─ ``` -The `` is usually your user's home directory. However, it is customizable with the -`cache_dir` argument on all methods, or by specifying either `HF_HOME` or -`HUGGINGFACE_HUB_CACHE` environment variable. +The `` is usually your user's home directory. However, it is customizable with the `cache_dir` argument on all methods, or by specifying either `HF_HOME` or `HF_HUB_CACHE` environment variable. Models, datasets and spaces share a common root. Each of these repositories contains the repository type, the namespace (organization or username) if it exists and the diff --git a/docs/source/en/package_reference/environment_variables.md b/docs/source/en/package_reference/environment_variables.md index 504b564bd9..3bb022d196 100644 --- a/docs/source/en/package_reference/environment_variables.md +++ b/docs/source/en/package_reference/environment_variables.md @@ -36,14 +36,14 @@ and the cache will be stored in this folder. Defaults to `"~/.cache/huggingface"` unless [XDG_CACHE_HOME](#xdgcachehome) is set. -### HUGGINGFACE_HUB_CACHE +### HF_HUB_CACHE To configure where repositories from the Hub will be cached locally (models, datasets and spaces). Defaults to `"$HF_HOME/hub"` (e.g. `"~/.cache/huggingface/hub"` by default). -### HUGGINGFACE_ASSETS_CACHE +### HF_ASSETS_CACHE To configure where [assets](../guides/manage-cache#caching-assets) created by downstream libraries will be cached locally. Those assets can be preprocessed data, files downloaded from GitHub, @@ -51,14 +51,14 @@ logs,... Defaults to `"$HF_HOME/assets"` (e.g. `"~/.cache/huggingface/assets"` by default). -### HUGGING_FACE_HUB_TOKEN +### HF_TOKEN To configure the User Access Token to authenticate to the Hub. If set, this value will overwrite the token stored on the machine (in `"$HF_HOME/token"`). See [login reference](../package_reference/login) for more details. -### HUGGINGFACE_HUB_VERBOSITY +### HF_HUB_VERBOSITY Set the verbosity level of the `huggingface_hub`'s logger. Must be one of `{"debug", "info", "warning", "error", "critical"}`. @@ -150,6 +150,18 @@ By default, `huggingface_hub` uses the Python-based `requests.get` and `requests Please note that using `hf_transfer` comes with certain limitations. Since it is not purely Python-based, debugging errors may be challenging. Additionally, `hf_transfer` lacks several user-friendly features such as progress bars, resumable downloads and proxies. These omissions are intentional to maintain the simplicity and speed of the Rust logic. Consequently, `hf_transfer` is not enabled by default in `huggingface_hub`. +## Deprecated environment variables + +In order to standardize all environment variables within the Hugging Face ecosystem, some variables have been marked as deprecated. Although they remain functional, they no longer take precedence over their replacements. The following table outlines the deprecated variables and their corresponding alternatives: + + +| Deprecated Variable | Replacement | +| --- | --- | +| `HUGGINGFACE_HUB_CACHE` | `HF_HUB_CACHE` | +| `HUGGINGFACE_ASSETS_CACHE` | `HF_ASSETS_CACHE` | +| `HUGGING_FACE_HUB_TOKEN` | `HF_TOKEN` | +| `HUGGINGFACE_HUB_VERBOSITY` | `HF_HUB_VERBOSITY` | + ## From external tools Some environment variables are not specific to `huggingface_hub` but are still taken into account when they are set. diff --git a/src/huggingface_hub/_snapshot_download.py b/src/huggingface_hub/_snapshot_download.py index 252baf4f73..e30fee0a6b 100644 --- a/src/huggingface_hub/_snapshot_download.py +++ b/src/huggingface_hub/_snapshot_download.py @@ -8,8 +8,8 @@ from .constants import ( DEFAULT_ETAG_TIMEOUT, DEFAULT_REVISION, + HF_HUB_CACHE, HF_HUB_ENABLE_HF_TRANSFER, - HUGGINGFACE_HUB_CACHE, REPO_TYPES, ) from .file_download import REGEX_COMMIT_HASH, hf_hub_download, repo_folder_name @@ -147,7 +147,7 @@ def snapshot_download( """ if cache_dir is None: - cache_dir = HUGGINGFACE_HUB_CACHE + cache_dir = HF_HUB_CACHE if revision is None: revision = DEFAULT_REVISION if isinstance(cache_dir, Path): diff --git a/src/huggingface_hub/constants.py b/src/huggingface_hub/constants.py index ee1cb98ce6..38ccd2ad9d 100644 --- a/src/huggingface_hub/constants.py +++ b/src/huggingface_hub/constants.py @@ -89,9 +89,14 @@ def _as_int(value: Optional[str]) -> Optional[int]: default_cache_path = os.path.join(hf_cache_home, "hub") default_assets_cache_path = os.path.join(hf_cache_home, "assets") +# Legacy env variables HUGGINGFACE_HUB_CACHE = os.getenv("HUGGINGFACE_HUB_CACHE", default_cache_path) HUGGINGFACE_ASSETS_CACHE = os.getenv("HUGGINGFACE_ASSETS_CACHE", default_assets_cache_path) +# New env variables +HF_HUB_CACHE = os.getenv("HF_HUB_CACHE", HUGGINGFACE_HUB_CACHE) +HF_ASSETS_CACHE = os.getenv("HF_ASSETS_CACHE", HUGGINGFACE_ASSETS_CACHE) + HF_HUB_OFFLINE = _is_true(os.environ.get("HF_HUB_OFFLINE") or os.environ.get("TRANSFORMERS_OFFLINE")) # Opt-out from telemetry requests diff --git a/src/huggingface_hub/file_download.py b/src/huggingface_hub/file_download.py index 49a55cd121..03147af1d3 100644 --- a/src/huggingface_hub/file_download.py +++ b/src/huggingface_hub/file_download.py @@ -28,6 +28,7 @@ DEFAULT_REQUEST_TIMEOUT, DEFAULT_REVISION, ENDPOINT, + HF_HUB_CACHE, HF_HUB_DISABLE_SYMLINKS_WARNING, HF_HUB_DOWNLOAD_TIMEOUT, HF_HUB_ENABLE_HF_TRANSFER, @@ -36,7 +37,7 @@ HUGGINGFACE_HEADER_X_LINKED_ETAG, HUGGINGFACE_HEADER_X_LINKED_SIZE, HUGGINGFACE_HEADER_X_REPO_COMMIT, - HUGGINGFACE_HUB_CACHE, + HUGGINGFACE_HUB_CACHE, # noqa: F401 # for backward compatibility REPO_ID_SEPARATOR, REPO_TYPES, REPO_TYPES_URL_PREFIXES, @@ -100,7 +101,7 @@ def are_symlinks_supported(cache_dir: Union[str, Path, None] = None) -> bool: """ # Defaults to HF cache if cache_dir is None: - cache_dir = HUGGINGFACE_HUB_CACHE + cache_dir = HF_HUB_CACHE cache_dir = str(Path(cache_dir).expanduser().resolve()) # make it unique # Check symlink compatibility only once (per cache directory) at first time use @@ -324,7 +325,7 @@ def filename_to_url( ) if cache_dir is None: - cache_dir = HUGGINGFACE_HUB_CACHE + cache_dir = HF_HUB_CACHE if isinstance(cache_dir, Path): cache_dir = str(cache_dir) @@ -644,7 +645,7 @@ def cached_download( ) if cache_dir is None: - cache_dir = HUGGINGFACE_HUB_CACHE + cache_dir = HF_HUB_CACHE if isinstance(cache_dir, Path): cache_dir = str(cache_dir) @@ -1157,7 +1158,7 @@ def hf_hub_download( ) if cache_dir is None: - cache_dir = HUGGINGFACE_HUB_CACHE + cache_dir = HF_HUB_CACHE if revision is None: revision = DEFAULT_REVISION if isinstance(cache_dir, Path): @@ -1524,7 +1525,7 @@ def try_to_load_from_cache( if repo_type not in REPO_TYPES: raise ValueError(f"Invalid repo type: {repo_type}. Accepted repo types are: {str(REPO_TYPES)}") if cache_dir is None: - cache_dir = HUGGINGFACE_HUB_CACHE + cache_dir = HF_HUB_CACHE object_id = repo_id.replace("/", "--") repo_cache = os.path.join(cache_dir, f"{repo_type}s--{object_id}") diff --git a/src/huggingface_hub/utils/_cache_assets.py b/src/huggingface_hub/utils/_cache_assets.py index d6a6421e3b..e5d435df9b 100644 --- a/src/huggingface_hub/utils/_cache_assets.py +++ b/src/huggingface_hub/utils/_cache_assets.py @@ -15,7 +15,7 @@ from pathlib import Path from typing import Union -from ..constants import HUGGINGFACE_ASSETS_CACHE +from ..constants import HF_ASSETS_CACHE def cached_assets_path( @@ -91,7 +91,7 @@ def cached_assets_path( assets_dir (`str`, `Path`, *optional*): Path to the folder where assets are cached. This must not be the same folder where Hub files are cached. Defaults to `HF_HOME / "assets"` if not provided. - Can also be set with `HUGGINGFACE_ASSETS_CACHE` environment variable. + Can also be set with `HF_ASSETS_CACHE` environment variable. Returns: Path to the cache folder (`Path`). @@ -115,7 +115,7 @@ def cached_assets_path( """ # Resolve assets_dir if assets_dir is None: - assets_dir = HUGGINGFACE_ASSETS_CACHE + assets_dir = HF_ASSETS_CACHE assets_dir = Path(assets_dir).expanduser().resolve() # Avoid names that could create path issues diff --git a/src/huggingface_hub/utils/_cache_manager.py b/src/huggingface_hub/utils/_cache_manager.py index e9368811f3..896eee724b 100644 --- a/src/huggingface_hub/utils/_cache_manager.py +++ b/src/huggingface_hub/utils/_cache_manager.py @@ -21,7 +21,7 @@ from pathlib import Path from typing import Dict, FrozenSet, List, Literal, Optional, Set, Union -from ..constants import HUGGINGFACE_HUB_CACHE +from ..constants import HF_HUB_CACHE from . import logging @@ -580,21 +580,18 @@ def scan_cache_dir(cache_dir: Optional[Union[str, Path]] = None) -> HFCacheInfo: Returns: a [`~HFCacheInfo`] object. """ if cache_dir is None: - cache_dir = HUGGINGFACE_HUB_CACHE + cache_dir = HF_HUB_CACHE cache_dir = Path(cache_dir).expanduser().resolve() if not cache_dir.exists(): raise CacheNotFound( - f"Cache directory not found: {cache_dir}. Please use `cache_dir`" - " argument or set `HUGGINGFACE_HUB_CACHE` environment variable.", + f"Cache directory not found: {cache_dir}. Please use `cache_dir` argument or set `HF_HUB_CACHE` environment variable.", cache_dir=cache_dir, ) if cache_dir.is_file(): raise ValueError( - f"Scan cache expects a directory but found a file: {cache_dir}. Please use" - " `cache_dir` argument or set `HUGGINGFACE_HUB_CACHE` environment" - " variable." + f"Scan cache expects a directory but found a file: {cache_dir}. Please use `cache_dir` argument or set `HF_HUB_CACHE` environment variable." ) repos: Set[CachedRepoInfo] = set() diff --git a/src/huggingface_hub/utils/_hf_folder.py b/src/huggingface_hub/utils/_hf_folder.py index 77daced7e8..f0c83da45a 100644 --- a/src/huggingface_hub/utils/_hf_folder.py +++ b/src/huggingface_hub/utils/_hf_folder.py @@ -46,7 +46,7 @@ def get_token(cls) -> Optional[str]: """ Get token or None if not existent. - Note that a token can be also provided using the `HUGGING_FACE_HUB_TOKEN` environment variable. + Note that a token can be also provided using the `HF_TOKEN` environment variable. Token is saved in the huggingface home folder. You can configure it by setting the `HF_HOME` environment variable. Previous location was `~/.huggingface/token`. @@ -63,7 +63,7 @@ def get_token(cls) -> Optional[str]: pass # 1. Is it set by environment variable ? - token: Optional[str] = os.environ.get("HUGGING_FACE_HUB_TOKEN") + token: Optional[str] = os.environ.get("HF_TOKEN") if token is not None: token = token.replace("\r", "").replace("\n", "").strip() return token diff --git a/src/huggingface_hub/utils/_runtime.py b/src/huggingface_hub/utils/_runtime.py index f08c8e4003..117e630396 100644 --- a/src/huggingface_hub/utils/_runtime.py +++ b/src/huggingface_hub/utils/_runtime.py @@ -305,8 +305,8 @@ def dump_environment_info() -> Dict[str, Any]: # Environment variables info["ENDPOINT"] = constants.ENDPOINT - info["HUGGINGFACE_HUB_CACHE"] = constants.HUGGINGFACE_HUB_CACHE - info["HUGGINGFACE_ASSETS_CACHE"] = constants.HUGGINGFACE_ASSETS_CACHE + info["HF_HUB_CACHE"] = constants.HF_HUB_CACHE + info["HF_ASSETS_CACHE"] = constants.HF_ASSETS_CACHE info["HF_TOKEN_PATH"] = constants.HF_TOKEN_PATH info["HF_HUB_OFFLINE"] = constants.HF_HUB_OFFLINE info["HF_HUB_DISABLE_TELEMETRY"] = constants.HF_HUB_DISABLE_TELEMETRY diff --git a/src/huggingface_hub/utils/logging.py b/src/huggingface_hub/utils/logging.py index 187641d03b..dc4d6a0054 100644 --- a/src/huggingface_hub/utils/logging.py +++ b/src/huggingface_hub/utils/logging.py @@ -50,18 +50,16 @@ def _get_library_root_logger() -> logging.Logger: def _get_default_logging_level(): """ - If HUGGINGFACE_HUB_VERBOSITY env var is set to one of the valid choices - return that as the new default level. If it is not - fall back to - `_default_log_level` + If `HF_HUB_VERBOSITY` env var is set to one of the valid choices return that as the new default level. If it is not + - fall back to `_default_log_level` """ - env_level_str = os.getenv("HUGGINGFACE_HUB_VERBOSITY", None) + env_level_str = os.getenv("HF_HUB_VERBOSITY", None) if env_level_str: if env_level_str in log_levels: return log_levels[env_level_str] else: logging.getLogger().warning( - f"Unknown option HUGGINGFACE_HUB_VERBOSITY={env_level_str}, " - f"has to be one of: { ', '.join(log_levels.keys()) }" + f"Unknown option HF_HUB_VERBOSITY={env_level_str}, has to be one of: { ', '.join(log_levels.keys()) }" ) return _default_log_level diff --git a/tests/test_cache_no_symlinks.py b/tests/test_cache_no_symlinks.py index 1703cc31a2..1f7004c185 100644 --- a/tests/test_cache_no_symlinks.py +++ b/tests/test_cache_no_symlinks.py @@ -6,7 +6,7 @@ import pytest from huggingface_hub import hf_hub_download, scan_cache_dir -from huggingface_hub.constants import CONFIG_NAME, HUGGINGFACE_HUB_CACHE +from huggingface_hub.constants import CONFIG_NAME, HF_HUB_CACHE from huggingface_hub.file_download import are_symlinks_supported from .testing_constants import TOKEN @@ -20,7 +20,7 @@ class TestCacheLayoutIfSymlinksNotSupported(unittest.TestCase): @patch( "huggingface_hub.file_download._are_symlinks_supported_in_dir", - {HUGGINGFACE_HUB_CACHE: True}, + {HF_HUB_CACHE: True}, ) def test_are_symlinks_supported_default(self) -> None: self.assertTrue(are_symlinks_supported()) diff --git a/tests/test_utils_assets.py b/tests/test_utils_assets.py index 0726dda4ae..bf5413928a 100644 --- a/tests/test_utils_assets.py +++ b/tests/test_utils_assets.py @@ -55,9 +55,9 @@ def test_cached_assets_path_forbidden_symbols(self) -> None: def test_cached_assets_path_default_assets_dir(self) -> None: with patch( - "huggingface_hub.utils._cache_assets.HUGGINGFACE_ASSETS_CACHE", + "huggingface_hub.utils._cache_assets.HF_ASSETS_CACHE", self.cache_dir, - ): # Uses environment variable from HUGGINGFACE_ASSETS_CACHE + ): # Uses environment variable from HF_ASSETS_CACHE self.assertEqual( cached_assets_path(library_name="datasets"), self.cache_dir / "datasets" / "default" / "default", diff --git a/tests/test_utils_hf_folder.py b/tests/test_utils_hf_folder.py index ce0448ec49..c101ab0dee 100644 --- a/tests/test_utils_hf_folder.py +++ b/tests/test_utils_hf_folder.py @@ -41,7 +41,7 @@ def test_token_workflow(self): self.assertEqual(HfFolder.get_token(), None) # test TOKEN in env self.assertEqual(HfFolder.get_token(), None) - with unittest.mock.patch.dict(os.environ, {"HUGGING_FACE_HUB_TOKEN": token}): + with unittest.mock.patch.dict(os.environ, {"HF_TOKEN": token}): self.assertEqual(HfFolder.get_token(), token) def test_token_in_old_path(self):