From 17be1c5e3007a75142f200c3035e7d127165cb48 Mon Sep 17 00:00:00 2001 From: Lucain Date: Mon, 6 Nov 2023 14:44:44 +0100 Subject: [PATCH] Add support for progress bars in hf_transfer uploads (#1804) * Add support for progress bars in hf_transfer uploads * apply suggestion --- docs/source/en/guides/download.md | 2 +- docs/source/en/guides/upload.md | 9 +++++-- src/huggingface_hub/lfs.py | 45 +++++++++++++++++++++---------- 3 files changed, 39 insertions(+), 17 deletions(-) diff --git a/docs/source/en/guides/download.md b/docs/source/en/guides/download.md index c6cf40ab71..459cdd7130 100644 --- a/docs/source/en/guides/download.md +++ b/docs/source/en/guides/download.md @@ -191,7 +191,7 @@ If you are running on a machine with high bandwidth, you can increase your downl -Progress bars are supported when downloading with `hf_transfer` starting from version `0.1.4`. Consider upgrading (`pip install -U hf-transfer`) if you plan to enable faster downloads. +Progress bars are supported in `hf_transfer` starting from version `0.1.4`. Consider upgrading (`pip install -U hf-transfer`) if you plan to enable faster downloads. diff --git a/docs/source/en/guides/upload.md b/docs/source/en/guides/upload.md index 14b72be0d1..81ee761ff8 100644 --- a/docs/source/en/guides/upload.md +++ b/docs/source/en/guides/upload.md @@ -460,8 +460,13 @@ be re-uploaded twice but checking it client-side can still save some time. - **Use `hf_transfer`**: this is a Rust-based [library](https://github.com/huggingface/hf_transfer) meant to speed up uploads on machines with very high bandwidth. To use it, you must install it (`pip install hf_transfer`) and enable it by setting `HF_HUB_ENABLE_HF_TRANSFER=1` as an environment variable. You can then use `huggingface_hub` normally. -Disclaimer: this is a power user tool. It is tested and production-ready but lacks user-friendly features like progress -bars or advanced error handling. For more details, please refer to this [section](https://huggingface.co/docs/huggingface_hub/hf_transfer). +Disclaimer: this is a power user tool. It is tested and production-ready but lacks user-friendly features like advanced error handling or proxies. For more details, please refer to this [section](https://huggingface.co/docs/huggingface_hub/hf_transfer). + + + +Progress bars are supported in `hf_transfer` starting from version `0.1.4`. Consider upgrading (`pip install -U hf-transfer`) if you plan to enable faster uploads. + + ## (legacy) Upload files with Git LFS diff --git a/src/huggingface_hub/lfs.py b/src/huggingface_hub/lfs.py index c3c890044d..1aad3250e5 100644 --- a/src/huggingface_hub/lfs.py +++ b/src/huggingface_hub/lfs.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Git LFS related type definitions and utilities""" +import inspect import io import os import re @@ -29,7 +30,7 @@ from huggingface_hub.constants import ENDPOINT, HF_HUB_ENABLE_HF_TRANSFER, REPO_TYPES_URL_PREFIXES from huggingface_hub.utils import get_session -from .utils import get_token_to_send, hf_raise_for_status, http_backoff, logging, validate_hf_hub_args +from .utils import get_token_to_send, hf_raise_for_status, http_backoff, logging, tqdm, validate_hf_hub_args from .utils.sha import sha256, sha_fileobj @@ -389,21 +390,37 @@ def _upload_parts_hf_transfer( " not available in your environment. Try `pip install hf_transfer`." ) - try: - return multipart_upload( - file_path=operation.path_or_fileobj, - parts_urls=sorted_parts_urls, - chunk_size=chunk_size, - max_files=128, - parallel_failures=127, # could be removed - max_retries=5, + supports_callback = "callback" in inspect.signature(multipart_upload).parameters + if not supports_callback: + warnings.warn( + "You are using an outdated version of `hf_transfer`. Consider upgrading to latest version to enable progress bars using `pip install -U hf_transfer`." ) - except Exception as e: - raise RuntimeError( - "An error occurred while uploading using `hf_transfer`. Consider disabling HF_HUB_ENABLE_HF_TRANSFER for" - " better error handling." - ) from e + total = operation.upload_info.size + desc = operation.path_in_repo + if len(desc) > 40: + desc = f"(…){desc[-40:]}" + disable = bool(logger.getEffectiveLevel() == logging.NOTSET) + + with tqdm(unit="B", unit_scale=True, total=total, initial=0, desc=desc, disable=disable) as progress: + try: + output = multipart_upload( + file_path=operation.path_or_fileobj, + parts_urls=sorted_parts_urls, + chunk_size=chunk_size, + max_files=128, + parallel_failures=127, # could be removed + max_retries=5, + **({"callback": progress.update} if supports_callback else {}), + ) + except Exception as e: + raise RuntimeError( + "An error occurred while uploading using `hf_transfer`. Consider disabling HF_HUB_ENABLE_HF_TRANSFER for" + " better error handling." + ) from e + if not supports_callback: + progress.update(total) + return output class SliceFileObj(AbstractContextManager): """