diff --git a/tests/test_regression.py b/tests/test_regression.py index 8c9d4a91c73b..e092945422ed 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -60,6 +60,9 @@ def test_model_from_modelscope(monkeypatch: pytest.MonkeyPatch): # model: https://modelscope.cn/models/qwen/Qwen1.5-0.5B-Chat/summary with monkeypatch.context() as m: m.setenv("VLLM_USE_MODELSCOPE", "True") + # Don't use HF_TOKEN for ModelScope repos, otherwise it will fail + # with 400 Client Error: Bad Request. + m.setenv("HF_TOKEN", "") llm = LLM(model="qwen/Qwen1.5-0.5B-Chat") prompts = [ diff --git a/vllm/model_executor/model_loader/default_loader.py b/vllm/model_executor/model_loader/default_loader.py index ddbd60940e9e..29a6e0af4bc6 100644 --- a/vllm/model_executor/model_loader/default_loader.py +++ b/vllm/model_executor/model_loader/default_loader.py @@ -11,8 +11,8 @@ from torch import nn from transformers.utils import SAFE_WEIGHTS_INDEX_NAME +from vllm import envs from vllm.config import LoadConfig, LoadFormat, ModelConfig, VllmConfig -from vllm.envs import VLLM_USE_MODELSCOPE from vllm.logger import init_logger from vllm.model_executor.model_loader.base_loader import BaseModelLoader from vllm.model_executor.model_loader.utils import ( @@ -64,7 +64,7 @@ def _maybe_download_from_modelscope( Returns the path to the downloaded model, or None if the model is not downloaded from ModelScope.""" - if VLLM_USE_MODELSCOPE: + if envs.VLLM_USE_MODELSCOPE: # download model from ModelScope hub, # lazy import so that modelscope is not required for normal use. # pylint: disable=C. diff --git a/vllm/transformers_utils/__init__.py b/vllm/transformers_utils/__init__.py index b556976a51ba..84bd7a747656 100644 --- a/vllm/transformers_utils/__init__.py +++ b/vllm/transformers_utils/__init__.py @@ -1,8 +1,8 @@ # SPDX-License-Identifier: Apache-2.0 -from vllm.envs import VLLM_USE_MODELSCOPE +from vllm import envs -if VLLM_USE_MODELSCOPE: +if envs.VLLM_USE_MODELSCOPE: try: # Patch here, before each import happens import modelscope diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index 2a2a8c181874..69e7207cc350 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -24,7 +24,7 @@ MODEL_FOR_CAUSAL_LM_MAPPING_NAMES) from transformers.utils import CONFIG_NAME as HF_CONFIG_NAME -from vllm.envs import VLLM_USE_MODELSCOPE +from vllm import envs from vllm.logger import init_logger # yapf conflicts with isort for this block # yapf: disable @@ -45,13 +45,12 @@ from vllm.transformers_utils.utils import check_gguf_file from vllm.utils import resolve_obj_by_qualname -if VLLM_USE_MODELSCOPE: +if envs.VLLM_USE_MODELSCOPE: from modelscope import AutoConfig else: from transformers import AutoConfig MISTRAL_CONFIG_NAME = "params.json" -HF_TOKEN = os.getenv('HF_TOKEN', None) logger = init_logger(__name__) @@ -130,7 +129,7 @@ def lookup_files() -> list[str]: ] # if model is remote, use hf_hub api to list files try: - if VLLM_USE_MODELSCOPE: + if envs.VLLM_USE_MODELSCOPE: from vllm.transformers_utils.utils import ( modelscope_list_repo_files) return modelscope_list_repo_files(repo_id, @@ -185,7 +184,7 @@ def file_or_path_exists(model: Union[str, Path], config_name: str, return file_exists(str(model), config_name, revision=revision, - token=HF_TOKEN) + token=os.getenv('HF_TOKEN', None)) def patch_rope_scaling(config: PretrainedConfig) -> None: @@ -312,7 +311,7 @@ def get_config( model, revision=revision, code_revision=code_revision, - token=HF_TOKEN, + token=os.getenv('HF_TOKEN', None), **kwargs, ) @@ -324,7 +323,7 @@ def get_config( model, revision=revision, code_revision=code_revision, - token=HF_TOKEN, + token=os.getenv('HF_TOKEN', None), **kwargs, ) else: @@ -334,7 +333,7 @@ def get_config( trust_remote_code=trust_remote_code, revision=revision, code_revision=code_revision, - token=HF_TOKEN, + token=os.getenv('HF_TOKEN', None), **kwargs, ) except ValueError as e: @@ -352,7 +351,7 @@ def get_config( raise e elif config_format == ConfigFormat.MISTRAL: - config = load_params_config(model, revision, token=HF_TOKEN, **kwargs) + config = load_params_config(model, revision, **kwargs) else: supported_formats = [ fmt.value for fmt in ConfigFormat if fmt != ConfigFormat.AUTO @@ -561,7 +560,7 @@ def get_sentence_transformer_tokenizer_config(model: str, # If model is on HuggingfaceHub, get the repo files repo_files = list_repo_files(model, revision=revision, - token=HF_TOKEN) + token=os.getenv('HF_TOKEN', None)) except Exception: repo_files = [] @@ -768,7 +767,7 @@ def get_hf_image_processor_config( **kwargs, ) -> dict[str, Any]: # ModelScope does not provide an interface for image_processor - if VLLM_USE_MODELSCOPE: + if envs.VLLM_USE_MODELSCOPE: return dict() # Separate model folder from file path for GGUF models if check_gguf_file(model): diff --git a/vllm/transformers_utils/tokenizer.py b/vllm/transformers_utils/tokenizer.py index e31580ede57b..fa7a208c48ed 100644 --- a/vllm/transformers_utils/tokenizer.py +++ b/vllm/transformers_utils/tokenizer.py @@ -13,7 +13,7 @@ from transformers import (AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast) -from vllm.envs import VLLM_USE_MODELSCOPE +from vllm import envs from vllm.logger import init_logger from vllm.lora.request import LoRARequest from vllm.transformers_utils.tokenizer_base import (TokenizerBase, @@ -168,7 +168,7 @@ def get_tokenizer( ) -> AnyTokenizer: """Gets a tokenizer for the given model name via HuggingFace or ModelScope. """ - if VLLM_USE_MODELSCOPE: + if envs.VLLM_USE_MODELSCOPE: # download model from ModelScope hub, # lazy import so that modelscope is not required for normal use. # pylint: disable=C.