Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

EHN: auto retry download on network errors #405

Merged
merged 7 commits into from
Aug 31, 2023
Merged
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 37 additions & 13 deletions xinference/model/llm/llm_family.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,25 +226,49 @@ def cache_from_huggingface(
if not os.path.exists(cache_dir):
os.makedirs(cache_dir, exist_ok=True)

max_retry = 3
jiayini1119 marked this conversation as resolved.
Show resolved Hide resolved

if llm_spec.model_format == "pytorch":
assert isinstance(llm_spec, PytorchLLMSpecV1)
huggingface_hub.snapshot_download(
llm_spec.model_id,
revision=llm_spec.model_revision,
local_dir=cache_dir,
local_dir_use_symlinks=True,
)

for _ in range(max_retry):
try:
huggingface_hub.snapshot_download(
llm_spec.model_id,
revision=llm_spec.model_revision,
local_dir=cache_dir,
local_dir_use_symlinks=True,
)
break
except huggingface_hub.utils.LocalEntryNotFoundError:
pass
jiayini1119 marked this conversation as resolved.
Show resolved Hide resolved
except Exception as e:
logger.error(f"fail to download the model: {e}")
break
else:
logger.info("fail to launch model due to network error")

elif llm_spec.model_format == "ggmlv3":
assert isinstance(llm_spec, GgmlLLMSpecV1)
file_name = llm_spec.model_file_name_template.format(quantization=quantization)
huggingface_hub.hf_hub_download(
llm_spec.model_id,
revision=llm_spec.model_revision,
filename=file_name,
local_dir=cache_dir,
local_dir_use_symlinks=True,
)

for _ in range(max_retry):
try:
huggingface_hub.hf_hub_download(
llm_spec.model_id,
revision=llm_spec.model_revision,
filename=file_name,
local_dir=cache_dir,
local_dir_use_symlinks=True,
)
break
except huggingface_hub.utils.LocalEntryNotFoundError:
pass
jiayini1119 marked this conversation as resolved.
Show resolved Hide resolved
jiayini1119 marked this conversation as resolved.
Show resolved Hide resolved
except Exception as e:
logger.error(f"fail to download the model: {e}")
jiayini1119 marked this conversation as resolved.
Show resolved Hide resolved
break
else:
UranusSeven marked this conversation as resolved.
Show resolved Hide resolved
logger.info("fail to launch model due to network error")
return cache_dir


Expand Down