Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions tests/test_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ def test_model_from_modelscope(monkeypatch: pytest.MonkeyPatch):
# model: https://modelscope.cn/models/qwen/Qwen1.5-0.5B-Chat/summary
with monkeypatch.context() as m:
m.setenv("VLLM_USE_MODELSCOPE", "True")
# Don't use HF_TOKEN for ModelScope repos, otherwise it will fail
# with 400 Client Error: Bad Request.
m.setenv("HF_TOKEN", "")
llm = LLM(model="qwen/Qwen1.5-0.5B-Chat")

prompts = [
Expand Down
4 changes: 2 additions & 2 deletions vllm/model_executor/model_loader/default_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
from torch import nn
from transformers.utils import SAFE_WEIGHTS_INDEX_NAME

from vllm import envs
from vllm.config import LoadConfig, LoadFormat, ModelConfig, VllmConfig
from vllm.envs import VLLM_USE_MODELSCOPE
from vllm.logger import init_logger
from vllm.model_executor.model_loader.base_loader import BaseModelLoader
from vllm.model_executor.model_loader.utils import (
Expand Down Expand Up @@ -64,7 +64,7 @@ def _maybe_download_from_modelscope(

Returns the path to the downloaded model, or None if the model is not
downloaded from ModelScope."""
if VLLM_USE_MODELSCOPE:
if envs.VLLM_USE_MODELSCOPE:
# download model from ModelScope hub,
# lazy import so that modelscope is not required for normal use.
# pylint: disable=C.
Expand Down
4 changes: 2 additions & 2 deletions vllm/transformers_utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# SPDX-License-Identifier: Apache-2.0

from vllm.envs import VLLM_USE_MODELSCOPE
from vllm import envs

if VLLM_USE_MODELSCOPE:
if envs.VLLM_USE_MODELSCOPE:
try:
# Patch here, before each import happens
import modelscope
Expand Down
21 changes: 10 additions & 11 deletions vllm/transformers_utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
MODEL_FOR_CAUSAL_LM_MAPPING_NAMES)
from transformers.utils import CONFIG_NAME as HF_CONFIG_NAME

from vllm.envs import VLLM_USE_MODELSCOPE
from vllm import envs
from vllm.logger import init_logger
# yapf conflicts with isort for this block
# yapf: disable
Expand All @@ -45,13 +45,12 @@
from vllm.transformers_utils.utils import check_gguf_file
from vllm.utils import resolve_obj_by_qualname

if VLLM_USE_MODELSCOPE:
if envs.VLLM_USE_MODELSCOPE:
from modelscope import AutoConfig
else:
from transformers import AutoConfig

MISTRAL_CONFIG_NAME = "params.json"
HF_TOKEN = os.getenv('HF_TOKEN', None)

logger = init_logger(__name__)

Expand Down Expand Up @@ -130,7 +129,7 @@ def lookup_files() -> list[str]:
]
# if model is remote, use hf_hub api to list files
try:
if VLLM_USE_MODELSCOPE:
if envs.VLLM_USE_MODELSCOPE:
from vllm.transformers_utils.utils import (
modelscope_list_repo_files)
return modelscope_list_repo_files(repo_id,
Expand Down Expand Up @@ -185,7 +184,7 @@ def file_or_path_exists(model: Union[str, Path], config_name: str,
return file_exists(str(model),
config_name,
revision=revision,
token=HF_TOKEN)
token=os.getenv('HF_TOKEN', None))


def patch_rope_scaling(config: PretrainedConfig) -> None:
Expand Down Expand Up @@ -312,7 +311,7 @@ def get_config(
model,
revision=revision,
code_revision=code_revision,
token=HF_TOKEN,
token=os.getenv('HF_TOKEN', None),
**kwargs,
)

Expand All @@ -324,7 +323,7 @@ def get_config(
model,
revision=revision,
code_revision=code_revision,
token=HF_TOKEN,
token=os.getenv('HF_TOKEN', None),
**kwargs,
)
else:
Expand All @@ -334,7 +333,7 @@ def get_config(
trust_remote_code=trust_remote_code,
revision=revision,
code_revision=code_revision,
token=HF_TOKEN,
token=os.getenv('HF_TOKEN', None),
**kwargs,
)
except ValueError as e:
Expand All @@ -352,7 +351,7 @@ def get_config(
raise e

elif config_format == ConfigFormat.MISTRAL:
config = load_params_config(model, revision, token=HF_TOKEN, **kwargs)
config = load_params_config(model, revision, **kwargs)
else:
supported_formats = [
fmt.value for fmt in ConfigFormat if fmt != ConfigFormat.AUTO
Expand Down Expand Up @@ -561,7 +560,7 @@ def get_sentence_transformer_tokenizer_config(model: str,
# If model is on HuggingfaceHub, get the repo files
repo_files = list_repo_files(model,
revision=revision,
token=HF_TOKEN)
token=os.getenv('HF_TOKEN', None))
except Exception:
repo_files = []

Expand Down Expand Up @@ -768,7 +767,7 @@ def get_hf_image_processor_config(
**kwargs,
) -> dict[str, Any]:
# ModelScope does not provide an interface for image_processor
if VLLM_USE_MODELSCOPE:
if envs.VLLM_USE_MODELSCOPE:
return dict()
# Separate model folder from file path for GGUF models
if check_gguf_file(model):
Expand Down
4 changes: 2 additions & 2 deletions vllm/transformers_utils/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from transformers import (AutoTokenizer, PreTrainedTokenizer,
PreTrainedTokenizerFast)

from vllm.envs import VLLM_USE_MODELSCOPE
from vllm import envs
from vllm.logger import init_logger
from vllm.lora.request import LoRARequest
from vllm.transformers_utils.tokenizer_base import (TokenizerBase,
Expand Down Expand Up @@ -168,7 +168,7 @@ def get_tokenizer(
) -> AnyTokenizer:
"""Gets a tokenizer for the given model name via HuggingFace or ModelScope.
"""
if VLLM_USE_MODELSCOPE:
if envs.VLLM_USE_MODELSCOPE:
# download model from ModelScope hub,
# lazy import so that modelscope is not required for normal use.
# pylint: disable=C.
Expand Down