Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding the ability to configure the timeout of get request #1720

Merged
merged 12 commits into from
Oct 12, 2023
8 changes: 8 additions & 0 deletions docs/source/en/package_reference/environment_variables.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,14 @@ small files will be duplicated to ease user experience while bigger files are sy

For more details, see the [download guide](../guides/download#download-files-to-local-folder).

### HF_HUB_ETAG_TIMEOUT

Integer value to define etag timeout when fetching ETag on the system level.
Shahafgo marked this conversation as resolved.
Show resolved Hide resolved

### HF_HUB_DOWNLOAD_TIMEOUT

Integer value to define get request timeout when fetching a file on the system level.
Shahafgo marked this conversation as resolved.
Show resolved Hide resolved

## Boolean values

The following environment variables expect a boolean value. The variable will be considered
Expand Down
7 changes: 6 additions & 1 deletion src/huggingface_hub/_snapshot_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@
from .constants import (
DEFAULT_REVISION,
HF_HUB_ENABLE_HF_TRANSFER,
HF_HUB_ETAG_TIMEOUT,
HUGGINGFACE_HUB_CACHE,
REPO_TYPES,
DEFAULT_ETAG_TIMEOUT,
)
from .file_download import REGEX_COMMIT_HASH, hf_hub_download, repo_folder_name
from .hf_api import HfApi
Expand All @@ -34,7 +36,7 @@ def snapshot_download(
library_version: Optional[str] = None,
user_agent: Optional[Union[Dict, str]] = None,
proxies: Optional[Dict] = None,
etag_timeout: float = 10,
etag_timeout: float = DEFAULT_ETAG_TIMEOUT,
resume_download: bool = False,
force_download: bool = False,
token: Optional[Union[bool, str]] = None,
Expand Down Expand Up @@ -145,6 +147,9 @@ def snapshot_download(

</Tip>
"""
if HF_HUB_ETAG_TIMEOUT != DEFAULT_ETAG_TIMEOUT:
etag_timeout = HF_HUB_ETAG_TIMEOUT
Shahafgo marked this conversation as resolved.
Show resolved Hide resolved

Wauplin marked this conversation as resolved.
Show resolved Hide resolved
if cache_dir is None:
cache_dir = HUGGINGFACE_HUB_CACHE
if revision is None:
Expand Down
9 changes: 9 additions & 0 deletions src/huggingface_hub/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ def _as_int(value: Optional[str]) -> Optional[int]:
FLAX_WEIGHTS_NAME = "flax_model.msgpack"
CONFIG_NAME = "config.json"
REPOCARD_NAME = "README.md"
DEFAULT_ETAG_TIMEOUT = 10
DEFAULT_DOWNLOAD_TIMEOUT = 10
DEFAULT_REQUEST_TIMEOUT = 10

# Git-related constants

Expand Down Expand Up @@ -130,6 +133,12 @@ def _as_int(value: Optional[str]) -> Optional[int]:
_as_int(os.environ.get("HF_HUB_LOCAL_DIR_AUTO_SYMLINK_THRESHOLD")) or 5 * 1024 * 1024
)

# Used to override the etag timeout on a system level
HF_HUB_ETAG_TIMEOUT: int = _as_int(os.environ.get("HF_HUB_ETAG_TIMEOUT")) or DEFAULT_ETAG_TIMEOUT

# Used to override the get request timeout on a system level
HF_HUB_DOWNLOAD_TIMEOUT: int = _as_int(os.environ.get("HF_HUB_DOWNLOAD_TIMEOUT")) or DEFAULT_DOWNLOAD_TIMEOUT

# List frameworks that are handled by the InferenceAPI service. Useful to scan endpoints and check which models are
# deployed and running. Since 95% of the models are using the top 4 frameworks listed below, we scan only those by
# default. We still keep the full list of supported frameworks in case we want to scan all of them.
Expand Down
22 changes: 18 additions & 4 deletions src/huggingface_hub/file_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,15 @@
HUGGINGFACE_HEADER_X_LINKED_ETAG,
HUGGINGFACE_HEADER_X_LINKED_SIZE,
HUGGINGFACE_HEADER_X_REPO_COMMIT,
HF_HUB_ETAG_TIMEOUT,
HF_HUB_DOWNLOAD_TIMEOUT,
HUGGINGFACE_HUB_CACHE,
REPO_ID_SEPARATOR,
REPO_TYPES,
REPO_TYPES_URL_PREFIXES,
DEFAULT_DOWNLOAD_TIMEOUT,
DEFAULT_ETAG_TIMEOUT,
DEFAULT_REQUEST_TIMEOUT,
)
from .utils import (
EntryNotFoundError,
Expand Down Expand Up @@ -373,7 +378,7 @@ def _request_wrapper(
max_retries: int = 0,
base_wait_time: float = 0.5,
max_wait_time: float = 2,
timeout: Optional[float] = 10.0,
timeout: Optional[float] = DEFAULT_REQUEST_TIMEOUT,
follow_relative_redirects: bool = False,
**params,
) -> requests.Response:
Expand Down Expand Up @@ -478,13 +483,16 @@ def http_get(
proxies=None,
resume_size: float = 0,
headers: Optional[Dict[str, str]] = None,
timeout: Optional[float] = 10.0,
timeout: Optional[float] = DEFAULT_DOWNLOAD_TIMEOUT,
max_retries: int = 0,
expected_size: Optional[int] = None,
):
"""
Download a remote file. Do not gobble up errors, and will return errors tailored to the Hugging Face Hub.
"""
if HF_HUB_DOWNLOAD_TIMEOUT != DEFAULT_DOWNLOAD_TIMEOUT:
timeout = HF_HUB_DOWNLOAD_TIMEOUT

Shahafgo marked this conversation as resolved.
Show resolved Hide resolved
if not resume_size:
if HF_HUB_ENABLE_HF_TRANSFER:
try:
Expand Down Expand Up @@ -576,7 +584,7 @@ def cached_download(
force_download: bool = False,
force_filename: Optional[str] = None,
proxies: Optional[Dict] = None,
etag_timeout: float = 10,
etag_timeout: float = DEFAULT_ETAG_TIMEOUT,
resume_download: bool = False,
token: Union[bool, str, None] = None,
local_files_only: bool = False,
Expand Down Expand Up @@ -656,6 +664,9 @@ def cached_download(

</Tip>
"""
if HF_HUB_ETAG_TIMEOUT != DEFAULT_ETAG_TIMEOUT:
etag_timeout = HF_HUB_ETAG_TIMEOUT

Shahafgo marked this conversation as resolved.
Show resolved Hide resolved
if not legacy_cache_layout:
warnings.warn(
"'cached_download' is the legacy way to download files from the HF hub, please consider upgrading to"
Expand Down Expand Up @@ -1005,7 +1016,7 @@ def hf_hub_download(
force_download: bool = False,
force_filename: Optional[str] = None,
proxies: Optional[Dict] = None,
etag_timeout: float = 10,
etag_timeout: float = DEFAULT_ETAG_TIMEOUT,
resume_download: bool = False,
token: Union[bool, str, None] = None,
local_files_only: bool = False,
Expand Down Expand Up @@ -1138,6 +1149,9 @@ def hf_hub_download(

</Tip>
"""
if HF_HUB_ETAG_TIMEOUT != DEFAULT_ETAG_TIMEOUT:
etag_timeout = HF_HUB_ETAG_TIMEOUT

Shahafgo marked this conversation as resolved.
Show resolved Hide resolved
if force_filename is not None:
warnings.warn(
"The `force_filename` parameter is deprecated as a new caching system, "
Expand Down
12 changes: 10 additions & 2 deletions src/huggingface_hub/hf_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@
)
from .constants import (
DEFAULT_REVISION,
HF_HUB_ETAG_TIMEOUT,
DEFAULT_ETAG_TIMEOUT,
ENDPOINT,
REGEX_COMMIT_OID,
REPO_TYPE_MODEL,
Expand Down Expand Up @@ -4041,7 +4043,7 @@ def hf_hub_download(
force_download: bool = False,
force_filename: Optional[str] = None,
proxies: Optional[Dict] = None,
etag_timeout: float = 10,
etag_timeout: float = DEFAULT_ETAG_TIMEOUT,
resume_download: bool = False,
local_files_only: bool = False,
legacy_cache_layout: bool = False,
Expand Down Expand Up @@ -4164,6 +4166,9 @@ def hf_hub_download(
"""
from .file_download import hf_hub_download

if HF_HUB_ETAG_TIMEOUT != DEFAULT_ETAG_TIMEOUT:
etag_timeout = HF_HUB_ETAG_TIMEOUT

Shahafgo marked this conversation as resolved.
Show resolved Hide resolved
return hf_hub_download(
repo_id=repo_id,
filename=filename,
Expand Down Expand Up @@ -4198,7 +4203,7 @@ def snapshot_download(
local_dir: Union[str, Path, None] = None,
local_dir_use_symlinks: Union[bool, Literal["auto"]] = "auto",
proxies: Optional[Dict] = None,
etag_timeout: float = 10,
etag_timeout: float = DEFAULT_ETAG_TIMEOUT,
resume_download: bool = False,
force_download: bool = False,
local_files_only: bool = False,
Expand Down Expand Up @@ -4296,6 +4301,9 @@ def snapshot_download(
"""
from ._snapshot_download import snapshot_download

if HF_HUB_ETAG_TIMEOUT != DEFAULT_ETAG_TIMEOUT:
etag_timeout = HF_HUB_ETAG_TIMEOUT

Shahafgo marked this conversation as resolved.
Show resolved Hide resolved
return snapshot_download(
repo_id=repo_id,
repo_type=repo_type,
Expand Down
2 changes: 2 additions & 0 deletions src/huggingface_hub/utils/_runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,8 @@ def dump_environment_info() -> Dict[str, Any]:
info["HF_HUB_DISABLE_EXPERIMENTAL_WARNING"] = constants.HF_HUB_DISABLE_EXPERIMENTAL_WARNING
info["HF_HUB_DISABLE_IMPLICIT_TOKEN"] = constants.HF_HUB_DISABLE_IMPLICIT_TOKEN
info["HF_HUB_ENABLE_HF_TRANSFER"] = constants.HF_HUB_ENABLE_HF_TRANSFER
info["HF_HUB_ETAG_TIMEOUT"] = constants.HF_HUB_ETAG_TIMEOUT
info["HF_HUB_DOWNLOAD_TIMEOUT"] = constants.HF_HUB_DOWNLOAD_TIMEOUT
Comment on lines +318 to +319
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Really like this, thank you!


print("\nCopy-and-paste the text below in your GitHub issue.\n")
print("\n".join([f"- {prop}: {val}" for prop, val in info.items()]) + "\n")
Expand Down