From 9a61e639cce340d325db2225385c4ebf40b1acea Mon Sep 17 00:00:00 2001 From: Linkun Date: Tue, 13 May 2025 12:41:23 -0700 Subject: [PATCH 1/5] move model_config after * Signed-off-by: Linkun --- tests/entrypoints/test_chat_utils.py | 6 +++--- vllm/entrypoints/chat_utils.py | 19 ++++++++++++++----- vllm/entrypoints/llm.py | 2 +- vllm/entrypoints/openai/serving_engine.py | 2 +- 4 files changed, 19 insertions(+), 10 deletions(-) diff --git a/tests/entrypoints/test_chat_utils.py b/tests/entrypoints/test_chat_utils.py index bcb25ed99062..6f48f925dad5 100644 --- a/tests/entrypoints/test_chat_utils.py +++ b/tests/entrypoints/test_chat_utils.py @@ -903,11 +903,11 @@ def test_resolve_content_format_hf_defined(model, expected_format): print(_try_extract_ast(chat_template)) resolved_format = resolve_chat_template_content_format( - model_config, None, # Test detecting the tokenizer's chat_template None, "auto", tokenizer, + model_config=model_config, ) assert resolved_format == expected_format @@ -962,11 +962,11 @@ def test_resolve_content_format_fallbacks(model, expected_format): print(_try_extract_ast(chat_template)) resolved_format = resolve_chat_template_content_format( - model_config, None, # Test detecting the tokenizer's chat_template None, "auto", tokenizer, + model_config=model_config, ) assert resolved_format == expected_format @@ -1021,11 +1021,11 @@ def test_resolve_content_format_examples(template_path, expected_format): print(_try_extract_ast(chat_template)) resolved_format = resolve_chat_template_content_format( - model_config, chat_template, None, "auto", dummy_tokenizer, + model_config=model_config, ) assert resolved_format == expected_format diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index 183b5bf68311..05d68c61363a 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -330,10 +330,13 @@ def resolve_mistral_chat_template( return None def resolve_hf_chat_template( - model_config: ModelConfig, tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast], chat_template: Optional[str], tools: Optional[list[dict[str, Any]]], + *, + model_config: ModelConfig, + # For backwards compatibility, keep deprecated args as kwargs + **kwargs: dict[str, Any], ) -> Optional[str]: # 1st priority: The given chat template if chat_template is not None: @@ -379,18 +382,21 @@ def resolve_hf_chat_template( def _resolve_chat_template_content_format( - model_config: ModelConfig, chat_template: Optional[str], tools: Optional[list[dict[str, Any]]], given_format: ChatTemplateContentFormatOption, tokenizer: AnyTokenizer, + *, + model_config: ModelConfig, + # For backwards compatibility, keep deprecated args as kwargs + **kwargs: dict[str, Any], ) -> _ChatTemplateContentFormat: if isinstance(tokenizer, (PreTrainedTokenizer, PreTrainedTokenizerFast)): hf_chat_template = resolve_hf_chat_template( - model_config, tokenizer, chat_template=chat_template, tools=tools, + model_config=model_config, ) else: hf_chat_template = None @@ -429,18 +435,21 @@ def _log_chat_template_content_format( def resolve_chat_template_content_format( - model_config: ModelConfig, chat_template: Optional[str], tools: Optional[list[dict[str, Any]]], given_format: ChatTemplateContentFormatOption, tokenizer: AnyTokenizer, + *, + model_config: ModelConfig, + # For backwards compatibility, keep deprecated args as kwargs + **kwargs: dict[str, Any], ) -> _ChatTemplateContentFormat: detected_format = _resolve_chat_template_content_format( - model_config, chat_template, tools, given_format, tokenizer, + model_config=model_config, ) _log_chat_template_content_format( diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py index cebddcc8e6aa..60b0a11995a6 100644 --- a/vllm/entrypoints/llm.py +++ b/vllm/entrypoints/llm.py @@ -731,11 +731,11 @@ def chat( tokenizer = self.get_tokenizer(lora_request) model_config = self.llm_engine.get_model_config() resolved_content_format = resolve_chat_template_content_format( - model_config, chat_template, tools, chat_template_content_format, tokenizer, + model_config=model_config, ) _chat_template_kwargs: dict[str, Any] = dict( diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py index f1d907f519c5..7da2e8426144 100644 --- a/vllm/entrypoints/openai/serving_engine.py +++ b/vllm/entrypoints/openai/serving_engine.py @@ -670,11 +670,11 @@ async def _preprocess_chat( model_config = self.model_config resolved_content_format = resolve_chat_template_content_format( - model_config, chat_template, tool_dicts, chat_template_content_format, tokenizer, + model_config=model_config, ) conversation, mm_data_future = parse_chat_messages_futures( messages, From 06ff8341e635e54ca08fc721b7fbba6ab8441db7 Mon Sep 17 00:00:00 2001 From: Linkun Date: Tue, 13 May 2025 13:04:55 -0700 Subject: [PATCH 2/5] pass model_config as kwarg Signed-off-by: Linkun --- vllm/entrypoints/chat_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index 05d68c61363a..d801d0b07f93 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -1211,10 +1211,10 @@ def apply_hf_chat_template( **kwargs: Any, ) -> str: hf_chat_template = resolve_hf_chat_template( - model_config, tokenizer, chat_template=chat_template, tools=tools, + model_config=model_config, ) if hf_chat_template is None: From 5de2f7f167c82599c4ca04a7d4640530eb38a028 Mon Sep 17 00:00:00 2001 From: Linkun Date: Tue, 13 May 2025 13:14:58 -0700 Subject: [PATCH 3/5] fix call order in api_server Signed-off-by: Linkun --- vllm/entrypoints/openai/api_server.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index a954a9ff90bc..5b3df0faccf6 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -971,10 +971,10 @@ async def init_app_state( chat_template=resolved_chat_template) else: hf_chat_template = resolve_hf_chat_template( - vllm_config.model_config, - tokenizer, + tokenizer=tokenizer, chat_template=None, tools=None, + model_config=vllm_config.model_config, ) if hf_chat_template != resolved_chat_template: From a8039cfc98e4dbb8bae86120814fadf2ae938484 Mon Sep 17 00:00:00 2001 From: Linkun Date: Tue, 13 May 2025 13:51:40 -0700 Subject: [PATCH 4/5] also fix apply_hf_chat_template Signed-off-by: Linkun --- tests/entrypoints/openai/test_chat_template.py | 4 ++-- tests/entrypoints/test_chat_utils.py | 4 ++-- vllm/entrypoints/chat_utils.py | 2 +- vllm/entrypoints/llm.py | 4 ++-- vllm/entrypoints/openai/serving_engine.py | 4 ++-- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/entrypoints/openai/test_chat_template.py b/tests/entrypoints/openai/test_chat_template.py index 48ede50e98f7..f18fbb0a9c71 100644 --- a/tests/entrypoints/openai/test_chat_template.py +++ b/tests/entrypoints/openai/test_chat_template.py @@ -122,10 +122,10 @@ def test_get_gen_prompt(model, template, add_generation_prompt, # Call the function and get the result result = apply_hf_chat_template( - model_config, - tokenizer, + tokenizer=tokenizer, conversation=mock_request.messages, chat_template=mock_request.chat_template or template_content, + model_config=model_config, tools=None, add_generation_prompt=mock_request.add_generation_prompt, continue_final_message=mock_request.continue_final_message, diff --git a/tests/entrypoints/test_chat_utils.py b/tests/entrypoints/test_chat_utils.py index 6f48f925dad5..43ad091971a7 100644 --- a/tests/entrypoints/test_chat_utils.py +++ b/tests/entrypoints/test_chat_utils.py @@ -793,10 +793,10 @@ def get_conversation(is_hf: bool): ) vllm_result = apply_hf_chat_template( - model_config, - tokenizer, + tokenizer=tokenizer, conversation=conversation, chat_template=None, + model_config=model_config, tools=None, add_generation_prompt=True, ) diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index d801d0b07f93..b098875af14a 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -1201,12 +1201,12 @@ def parse_chat_messages_futures( def apply_hf_chat_template( - model_config: ModelConfig, tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast], conversation: list[ConversationMessage], chat_template: Optional[str], tools: Optional[list[dict[str, Any]]], *, + model_config: ModelConfig, tokenize: bool = False, # Different from HF's default **kwargs: Any, ) -> str: diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py index 60b0a11995a6..053ee55bb6a8 100644 --- a/vllm/entrypoints/llm.py +++ b/vllm/entrypoints/llm.py @@ -767,9 +767,9 @@ def chat( ) else: prompt_str = apply_hf_chat_template( - model_config, - tokenizer, + tokenizer=tokenizer, conversation=conversation, + model_config=model_config, **_chat_template_kwargs, ) # Special tokens are already included in chat templates so diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py index 7da2e8426144..f9eebde37181 100644 --- a/vllm/entrypoints/openai/serving_engine.py +++ b/vllm/entrypoints/openai/serving_engine.py @@ -701,9 +701,9 @@ async def _preprocess_chat( ) else: request_prompt = apply_hf_chat_template( - model_config, - tokenizer, + tokenizer=tokenizer, conversation=conversation, + model_config=model_config, **_chat_template_kwargs, ) From 477cd80544dba9bd4387bc106d62574d016a3857 Mon Sep 17 00:00:00 2001 From: Linkun Date: Tue, 13 May 2025 20:20:29 -0700 Subject: [PATCH 5/5] depcreate_kwargs Signed-off-by: Linkun --- vllm/entrypoints/chat_utils.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index b098875af14a..6f5514a6f801 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -44,7 +44,7 @@ # yapf: enable from vllm.transformers_utils.processor import cached_get_processor from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer -from vllm.utils import random_uuid +from vllm.utils import deprecate_kwargs, random_uuid logger = init_logger(__name__) @@ -329,14 +329,17 @@ def resolve_mistral_chat_template( "so it will be ignored.") return None +@deprecate_kwargs( + "trust_remote_code", + additional_message="Please use `model_config.trust_remote_code` instead.", +) def resolve_hf_chat_template( tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast], chat_template: Optional[str], tools: Optional[list[dict[str, Any]]], *, model_config: ModelConfig, - # For backwards compatibility, keep deprecated args as kwargs - **kwargs: dict[str, Any], + trsut_remote_code: Optional[bool] = None, ) -> Optional[str]: # 1st priority: The given chat template if chat_template is not None: @@ -388,8 +391,6 @@ def _resolve_chat_template_content_format( tokenizer: AnyTokenizer, *, model_config: ModelConfig, - # For backwards compatibility, keep deprecated args as kwargs - **kwargs: dict[str, Any], ) -> _ChatTemplateContentFormat: if isinstance(tokenizer, (PreTrainedTokenizer, PreTrainedTokenizerFast)): hf_chat_template = resolve_hf_chat_template( @@ -434,6 +435,10 @@ def _log_chat_template_content_format( ) +@deprecate_kwargs( + "trust_remote_code", + additional_message="Please use `model_config.trust_remote_code` instead.", +) def resolve_chat_template_content_format( chat_template: Optional[str], tools: Optional[list[dict[str, Any]]], @@ -441,8 +446,7 @@ def resolve_chat_template_content_format( tokenizer: AnyTokenizer, *, model_config: ModelConfig, - # For backwards compatibility, keep deprecated args as kwargs - **kwargs: dict[str, Any], + trust_remote_code: Optional[bool] = None, ) -> _ChatTemplateContentFormat: detected_format = _resolve_chat_template_content_format( chat_template, @@ -1200,6 +1204,10 @@ def parse_chat_messages_futures( return conversation, mm_tracker.all_mm_data() +@deprecate_kwargs( + "trust_remote_code", + additional_message="Please use `model_config.trust_remote_code` instead.", +) def apply_hf_chat_template( tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast], conversation: list[ConversationMessage], @@ -1208,6 +1216,8 @@ def apply_hf_chat_template( *, model_config: ModelConfig, tokenize: bool = False, # Different from HF's default + # Deprecated, explicitly capture here so it doesn't slit into kwargs. + trust_remote_code: Optional[bool] = None, **kwargs: Any, ) -> str: hf_chat_template = resolve_hf_chat_template(