Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions src/huggingface_hub/hf_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3623,11 +3623,6 @@ def create_repo(
if provided_space_args:
warnings.warn(f"Ignoring provided {', '.join(provided_space_args)} because repo_type is not 'space'.")

if getattr(self, "_lfsmultipartthresh", None):
# Testing purposes only.
# See https://github.com/huggingface/huggingface_hub/pull/733/files#r820604472
json["lfsmultipartthresh"] = self._lfsmultipartthresh # type: ignore

if resource_group_id is not None:
json["resourceGroupId"] = resource_group_id

Expand Down
73 changes: 8 additions & 65 deletions tests/test_hf_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@

import pytest

import huggingface_hub.lfs
from huggingface_hub import HfApi, SpaceHardware, SpaceStage, SpaceStorage, constants
from huggingface_hub._commit_api import (
CommitOperationAdd,
Expand Down Expand Up @@ -2732,86 +2731,30 @@ def setup_local_clone(self) -> None:
subprocess.run(["git", "lfs", "track", "*.epub"], check=True, cwd=self.cache_dir)

@require_git_lfs
def test_end_to_end_thresh_6M(self):
# Little-hack: create repo with defined `_lfsmultipartthresh`. Only for tests purposes
self._api._lfsmultipartthresh = 6 * 10**6
def test_git_push_end_to_end(self):
self.repo_url = self._api.create_repo(repo_id=repo_name())
self.repo_id = self.repo_url.repo_id
self._api._lfsmultipartthresh = None
self.setup_local_clone()

subprocess.run(
["wget", LARGE_FILE_18MB], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=self.cache_dir
)
subprocess.run(["git", "add", "*"], check=True, cwd=self.cache_dir)
subprocess.run(["git", "commit", "-m", "commit message"], check=True, cwd=self.cache_dir)

# This will fail as we haven't set up our custom transfer agent yet.
failed_process = subprocess.run(
["git", "push"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
cwd=self.cache_dir,
)
self.assertEqual(failed_process.returncode, 1)
self.assertIn('Run "hf lfs-enable-largefiles ./path/to/your/repo"', failed_process.stderr.decode())
# ^ Instructions on how to fix this are included in the error message.
subprocess.run(["hf", "lfs-enable-largefiles", self.cache_dir], check=True)

start_time = time.time()
subprocess.run(["git", "push"], check=True, cwd=self.cache_dir)
print("took", time.time() - start_time)

# To be 100% sure, let's download the resolved file
pdf_url = f"{self.repo_url}/resolve/main/progit.pdf"
DEST_FILENAME = "uploaded.pdf"
subprocess.run(
["wget", pdf_url, "-O", DEST_FILENAME],
check=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
cwd=self.cache_dir,
)
dest_filesize = (self.cache_dir / DEST_FILENAME).stat().st_size
assert dest_filesize == 18685041

@require_git_lfs
def test_end_to_end_thresh_16M(self):
# Here we'll push one multipart and one non-multipart file in the same commit, and see what happens
# Little-hack: create repo with defined `_lfsmultipartthresh`. Only for tests purposes
self._api._lfsmultipartthresh = 16 * 10**6
self.repo_url = self._api.create_repo(repo_id=repo_name())
self.repo_id = self.repo_url.repo_id
self._api._lfsmultipartthresh = None
self.setup_local_clone()

subprocess.run(
["wget", LARGE_FILE_18MB], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=self.cache_dir
)
subprocess.run(
["wget", LARGE_FILE_14MB], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=self.cache_dir
)
subprocess.run(["git", "add", "*"], check=True, cwd=self.cache_dir)
subprocess.run(["git", "commit", "-m", "both files in same commit"], check=True, cwd=self.cache_dir)
subprocess.run(["hf", "lfs-enable-largefiles", self.cache_dir], check=True)

start_time = time.time()
subprocess.run(["git", "push"], check=True, cwd=self.cache_dir)
print("took", time.time() - start_time)

def test_upload_lfs_file_multipart(self):
"""End to end test to check upload an LFS file using multipart upload works."""
self._api._lfsmultipartthresh = 16 * 10**6
self.repo_id = self._api.create_repo(repo_id=repo_name()).repo_id
self._api._lfsmultipartthresh = None

with patch.object(
huggingface_hub.lfs,
"_upload_parts_iteratively",
wraps=huggingface_hub.lfs._upload_parts_iteratively,
) as mock:
self._api.upload_file(repo_id=self.repo_id, path_or_fileobj=b"0" * 18 * 10**6, path_in_repo="lfs.bin")
mock.assert_called_once() # It used multipart upload
with SoftTemporaryDirectory() as tmp_dir:
filepath = hf_hub_download(
repo_id=self.repo_id,
filename="progit.pdf",
cache_dir=tmp_dir,
)
assert Path(filepath).stat().st_size == 18685041


class ParseHFUrlTest(unittest.TestCase):
Expand Down
Loading