Skip to content

Commit

Permalink
Fix get file size on lfs (#1188)
Browse files Browse the repository at this point in the history
* Fix get file size on LFS

* fix typos + add description
  • Loading branch information
Wauplin authored Nov 15, 2022
1 parent 131fd35 commit b1a11c2
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 4 deletions.
2 changes: 1 addition & 1 deletion .github/ISSUE_TEMPLATE/bug-report.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ body:
huggingface-cli env
```
If your are working in a notebook, please run it in a code cell:
If you are working in a notebook, please run it in a code cell:
```py
from huggingface_hub import dump_environment_info
Expand Down
2 changes: 1 addition & 1 deletion src/huggingface_hub/_commit_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,7 @@ def fetch_upload_modes(
if not path.endswith(".gitkeep"):
warnings.warn(
f"About to commit an empty file: '{path}'. Are you sure this is"
" intended ?"
" intended?"
)
upload_modes[path] = "regular"

Expand Down
1 change: 1 addition & 0 deletions src/huggingface_hub/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def _is_true_or_auto(value: Optional[str]) -> bool:
HUGGINGFACE_CO_URL_TEMPLATE = ENDPOINT + "/{repo_id}/resolve/{revision}/{filename}"
HUGGINGFACE_HEADER_X_REPO_COMMIT = "X-Repo-Commit"
HUGGINGFACE_HEADER_X_LINKED_ETAG = "X-Linked-Etag"
HUGGINGFACE_HEADER_X_LINKED_SIZE = "X-Linked-Size"

REPO_ID_SEPARATOR = "--"
# ^ this substring is not allowed in repo_ids on hf.co
Expand Down
9 changes: 7 additions & 2 deletions src/huggingface_hub/file_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
HF_HUB_DISABLE_SYMLINKS_WARNING,
HUGGINGFACE_CO_URL_TEMPLATE,
HUGGINGFACE_HEADER_X_LINKED_ETAG,
HUGGINGFACE_HEADER_X_LINKED_SIZE,
HUGGINGFACE_HEADER_X_REPO_COMMIT,
HUGGINGFACE_HUB_CACHE,
REPO_ID_SEPARATOR,
Expand Down Expand Up @@ -146,7 +147,8 @@ class HfFileMetadata:
location (`str`):
Location where to download the file. Can be a Hub url or not (CDN).
size (`size`):
Size of the file.
Size of the file. In case of an LFS file, contains the size of the actual
LFS file, not the pointer.
"""

commit_hash: Optional[str]
Expand Down Expand Up @@ -1384,7 +1386,10 @@ def get_hf_file_metadata(
# Do not use directly `url`, as `_request_wrapper` might have followed relative
# redirects.
location=r.headers.get("Location") or r.request.url, # type: ignore
size=_int_or_none(r.headers.get("Content-Length")),
size=_int_or_none(
r.headers.get(HUGGINGFACE_HEADER_X_LINKED_SIZE)
or r.headers.get("Content-Length")
),
)


Expand Down
7 changes: 7 additions & 0 deletions src/huggingface_hub/utils/_runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,13 @@ def is_google_colab() -> bool:


def dump_environment_info() -> Dict[str, Any]:
"""Dump information about the machine to help debugging issues.
Similar helper exist in:
- `datasets` (https://github.com/huggingface/datasets/blob/main/src/datasets/commands/env.py)
- `diffusers` (https://github.com/huggingface/diffusers/blob/main/src/diffusers/commands/env.py)
- `transformers` (https://github.com/huggingface/transformers/blob/main/src/transformers/commands/env.py)
"""
from huggingface_hub import HfFolder, whoami
from huggingface_hub.utils import list_credential_helpers

Expand Down
11 changes: 11 additions & 0 deletions tests/test_file_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,17 @@ def test_get_hf_file_metadata_from_a_renamed_repo(self) -> None:
url.replace(DUMMY_RENAMED_OLD_MODEL_ID, DUMMY_RENAMED_NEW_MODEL_ID),
)

def test_get_hf_file_metadata_from_a_lfs_file(self) -> None:
"""Test getting metadata from an LFS file.
Must get size of the LFS file, not size of the pointer file
"""
url = hf_hub_url("gpt2", filename="tf_model.h5")
metadata = get_hf_file_metadata(url)

self.assertIn("cdn-lfs", metadata.location) # Redirection
self.assertEqual(metadata.size, 497933648) # Size of LFS file, not pointer


class StagingCachedDownloadTest(unittest.TestCase):
def test_download_from_a_gated_repo_with_hf_hub_download(self):
Expand Down

0 comments on commit b1a11c2

Please sign in to comment.