From 9a61e639cce340d325db2225385c4ebf40b1acea Mon Sep 17 00:00:00 2001
From: Linkun <github@lkchen.net>
Date: Tue, 13 May 2025 12:41:23 -0700
Subject: [PATCH 1/5] move model_config after *

Signed-off-by: Linkun <github@lkchen.net>
---
 tests/entrypoints/test_chat_utils.py      |  6 +++---
 vllm/entrypoints/chat_utils.py            | 19 ++++++++++++++-----
 vllm/entrypoints/llm.py                   |  2 +-
 vllm/entrypoints/openai/serving_engine.py |  2 +-
 4 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/tests/entrypoints/test_chat_utils.py b/tests/entrypoints/test_chat_utils.py
index bcb25ed99062..6f48f925dad5 100644
--- a/tests/entrypoints/test_chat_utils.py
+++ b/tests/entrypoints/test_chat_utils.py
@@ -903,11 +903,11 @@ def test_resolve_content_format_hf_defined(model, expected_format):
     print(_try_extract_ast(chat_template))
 
     resolved_format = resolve_chat_template_content_format(
-        model_config,
         None,  # Test detecting the tokenizer's chat_template
         None,
         "auto",
         tokenizer,
+        model_config=model_config,
     )
 
     assert resolved_format == expected_format
@@ -962,11 +962,11 @@ def test_resolve_content_format_fallbacks(model, expected_format):
     print(_try_extract_ast(chat_template))
 
     resolved_format = resolve_chat_template_content_format(
-        model_config,
         None,  # Test detecting the tokenizer's chat_template
         None,
         "auto",
         tokenizer,
+        model_config=model_config,
     )
 
     assert resolved_format == expected_format
@@ -1021,11 +1021,11 @@ def test_resolve_content_format_examples(template_path, expected_format):
     print(_try_extract_ast(chat_template))
 
     resolved_format = resolve_chat_template_content_format(
-        model_config,
         chat_template,
         None,
         "auto",
         dummy_tokenizer,
+        model_config=model_config,
     )
 
     assert resolved_format == expected_format
diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
index 183b5bf68311..05d68c61363a 100644
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -330,10 +330,13 @@ def resolve_mistral_chat_template(
     return None
 
 def resolve_hf_chat_template(
-    model_config: ModelConfig,
     tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast],
     chat_template: Optional[str],
     tools: Optional[list[dict[str, Any]]],
+    *,
+    model_config: ModelConfig,
+    # For backwards compatibility, keep deprecated args as kwargs
+    **kwargs: dict[str, Any],
 ) -> Optional[str]:
     # 1st priority: The given chat template
     if chat_template is not None:
@@ -379,18 +382,21 @@ def resolve_hf_chat_template(
 
 
 def _resolve_chat_template_content_format(
-    model_config: ModelConfig,
     chat_template: Optional[str],
     tools: Optional[list[dict[str, Any]]],
     given_format: ChatTemplateContentFormatOption,
     tokenizer: AnyTokenizer,
+    *,
+    model_config: ModelConfig,
+    # For backwards compatibility, keep deprecated args as kwargs
+    **kwargs: dict[str, Any],
 ) -> _ChatTemplateContentFormat:
     if isinstance(tokenizer, (PreTrainedTokenizer, PreTrainedTokenizerFast)):
         hf_chat_template = resolve_hf_chat_template(
-            model_config,
             tokenizer,
             chat_template=chat_template,
             tools=tools,
+            model_config=model_config,
         )
     else:
         hf_chat_template = None
@@ -429,18 +435,21 @@ def _log_chat_template_content_format(
 
 
 def resolve_chat_template_content_format(
-    model_config: ModelConfig,
     chat_template: Optional[str],
     tools: Optional[list[dict[str, Any]]],
     given_format: ChatTemplateContentFormatOption,
     tokenizer: AnyTokenizer,
+    *,
+    model_config: ModelConfig,
+    # For backwards compatibility, keep deprecated args as kwargs
+    **kwargs: dict[str, Any],
 ) -> _ChatTemplateContentFormat:
     detected_format = _resolve_chat_template_content_format(
-        model_config,
         chat_template,
         tools,
         given_format,
         tokenizer,
+        model_config=model_config,
     )
 
     _log_chat_template_content_format(
diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py
index cebddcc8e6aa..60b0a11995a6 100644
--- a/vllm/entrypoints/llm.py
+++ b/vllm/entrypoints/llm.py
@@ -731,11 +731,11 @@ def chat(
         tokenizer = self.get_tokenizer(lora_request)
         model_config = self.llm_engine.get_model_config()
         resolved_content_format = resolve_chat_template_content_format(
-            model_config,
             chat_template,
             tools,
             chat_template_content_format,
             tokenizer,
+            model_config=model_config,
         )
 
         _chat_template_kwargs: dict[str, Any] = dict(
diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py
index f1d907f519c5..7da2e8426144 100644
--- a/vllm/entrypoints/openai/serving_engine.py
+++ b/vllm/entrypoints/openai/serving_engine.py
@@ -670,11 +670,11 @@ async def _preprocess_chat(
         model_config = self.model_config
 
         resolved_content_format = resolve_chat_template_content_format(
-            model_config,
             chat_template,
             tool_dicts,
             chat_template_content_format,
             tokenizer,
+            model_config=model_config,
         )
         conversation, mm_data_future = parse_chat_messages_futures(
             messages,

From 06ff8341e635e54ca08fc721b7fbba6ab8441db7 Mon Sep 17 00:00:00 2001
From: Linkun <github@lkchen.net>
Date: Tue, 13 May 2025 13:04:55 -0700
Subject: [PATCH 2/5] pass model_config as kwarg

Signed-off-by: Linkun <github@lkchen.net>
---
 vllm/entrypoints/chat_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
index 05d68c61363a..d801d0b07f93 100644
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -1211,10 +1211,10 @@ def apply_hf_chat_template(
     **kwargs: Any,
 ) -> str:
     hf_chat_template = resolve_hf_chat_template(
-        model_config,
         tokenizer,
         chat_template=chat_template,
         tools=tools,
+        model_config=model_config,
     )
 
     if hf_chat_template is None:

From 5de2f7f167c82599c4ca04a7d4640530eb38a028 Mon Sep 17 00:00:00 2001
From: Linkun <github@lkchen.net>
Date: Tue, 13 May 2025 13:14:58 -0700
Subject: [PATCH 3/5] fix call order in api_server

Signed-off-by: Linkun <github@lkchen.net>
---
 vllm/entrypoints/openai/api_server.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py
index a954a9ff90bc..5b3df0faccf6 100644
--- a/vllm/entrypoints/openai/api_server.py
+++ b/vllm/entrypoints/openai/api_server.py
@@ -971,10 +971,10 @@ async def init_app_state(
                 chat_template=resolved_chat_template)
         else:
             hf_chat_template = resolve_hf_chat_template(
-                vllm_config.model_config,
-                tokenizer,
+                tokenizer=tokenizer,
                 chat_template=None,
                 tools=None,
+                model_config=vllm_config.model_config,
             )
 
             if hf_chat_template != resolved_chat_template:

From a8039cfc98e4dbb8bae86120814fadf2ae938484 Mon Sep 17 00:00:00 2001
From: Linkun <github@lkchen.net>
Date: Tue, 13 May 2025 13:51:40 -0700
Subject: [PATCH 4/5] also fix apply_hf_chat_template

Signed-off-by: Linkun <github@lkchen.net>
---
 tests/entrypoints/openai/test_chat_template.py | 4 ++--
 tests/entrypoints/test_chat_utils.py           | 4 ++--
 vllm/entrypoints/chat_utils.py                 | 2 +-
 vllm/entrypoints/llm.py                        | 4 ++--
 vllm/entrypoints/openai/serving_engine.py      | 4 ++--
 5 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/tests/entrypoints/openai/test_chat_template.py b/tests/entrypoints/openai/test_chat_template.py
index 48ede50e98f7..f18fbb0a9c71 100644
--- a/tests/entrypoints/openai/test_chat_template.py
+++ b/tests/entrypoints/openai/test_chat_template.py
@@ -122,10 +122,10 @@ def test_get_gen_prompt(model, template, add_generation_prompt,
 
     # Call the function and get the result
     result = apply_hf_chat_template(
-        model_config,
-        tokenizer,
+        tokenizer=tokenizer,
         conversation=mock_request.messages,
         chat_template=mock_request.chat_template or template_content,
+        model_config=model_config,
         tools=None,
         add_generation_prompt=mock_request.add_generation_prompt,
         continue_final_message=mock_request.continue_final_message,
diff --git a/tests/entrypoints/test_chat_utils.py b/tests/entrypoints/test_chat_utils.py
index 6f48f925dad5..43ad091971a7 100644
--- a/tests/entrypoints/test_chat_utils.py
+++ b/tests/entrypoints/test_chat_utils.py
@@ -793,10 +793,10 @@ def get_conversation(is_hf: bool):
     )
 
     vllm_result = apply_hf_chat_template(
-        model_config,
-        tokenizer,
+        tokenizer=tokenizer,
         conversation=conversation,
         chat_template=None,
+        model_config=model_config,
         tools=None,
         add_generation_prompt=True,
     )
diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
index d801d0b07f93..b098875af14a 100644
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -1201,12 +1201,12 @@ def parse_chat_messages_futures(
 
 
 def apply_hf_chat_template(
-    model_config: ModelConfig,
     tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast],
     conversation: list[ConversationMessage],
     chat_template: Optional[str],
     tools: Optional[list[dict[str, Any]]],
     *,
+    model_config: ModelConfig,
     tokenize: bool = False,  # Different from HF's default
     **kwargs: Any,
 ) -> str:
diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py
index 60b0a11995a6..053ee55bb6a8 100644
--- a/vllm/entrypoints/llm.py
+++ b/vllm/entrypoints/llm.py
@@ -767,9 +767,9 @@ def chat(
                 )
             else:
                 prompt_str = apply_hf_chat_template(
-                    model_config,
-                    tokenizer,
+                    tokenizer=tokenizer,
                     conversation=conversation,
+                    model_config=model_config,
                     **_chat_template_kwargs,
                 )
                 # Special tokens are already included in chat templates so
diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py
index 7da2e8426144..f9eebde37181 100644
--- a/vllm/entrypoints/openai/serving_engine.py
+++ b/vllm/entrypoints/openai/serving_engine.py
@@ -701,9 +701,9 @@ async def _preprocess_chat(
             )
         else:
             request_prompt = apply_hf_chat_template(
-                model_config,
-                tokenizer,
+                tokenizer=tokenizer,
                 conversation=conversation,
+                model_config=model_config,
                 **_chat_template_kwargs,
             )
 

From 477cd80544dba9bd4387bc106d62574d016a3857 Mon Sep 17 00:00:00 2001
From: Linkun <github@lkchen.net>
Date: Tue, 13 May 2025 20:20:29 -0700
Subject: [PATCH 5/5] depcreate_kwargs

Signed-off-by: Linkun <github@lkchen.net>
---
 vllm/entrypoints/chat_utils.py | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
index b098875af14a..6f5514a6f801 100644
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -44,7 +44,7 @@
 # yapf: enable
 from vllm.transformers_utils.processor import cached_get_processor
 from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer
-from vllm.utils import random_uuid
+from vllm.utils import deprecate_kwargs, random_uuid
 
 logger = init_logger(__name__)
 
@@ -329,14 +329,17 @@ def resolve_mistral_chat_template(
             "so it will be ignored.")
     return None
 
+@deprecate_kwargs(
+    "trust_remote_code",
+    additional_message="Please use `model_config.trust_remote_code` instead.",
+)
 def resolve_hf_chat_template(
     tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast],
     chat_template: Optional[str],
     tools: Optional[list[dict[str, Any]]],
     *,
     model_config: ModelConfig,
-    # For backwards compatibility, keep deprecated args as kwargs
-    **kwargs: dict[str, Any],
+    trsut_remote_code: Optional[bool] = None,
 ) -> Optional[str]:
     # 1st priority: The given chat template
     if chat_template is not None:
@@ -388,8 +391,6 @@ def _resolve_chat_template_content_format(
     tokenizer: AnyTokenizer,
     *,
     model_config: ModelConfig,
-    # For backwards compatibility, keep deprecated args as kwargs
-    **kwargs: dict[str, Any],
 ) -> _ChatTemplateContentFormat:
     if isinstance(tokenizer, (PreTrainedTokenizer, PreTrainedTokenizerFast)):
         hf_chat_template = resolve_hf_chat_template(
@@ -434,6 +435,10 @@ def _log_chat_template_content_format(
         )
 
 
+@deprecate_kwargs(
+    "trust_remote_code",
+    additional_message="Please use `model_config.trust_remote_code` instead.",
+)
 def resolve_chat_template_content_format(
     chat_template: Optional[str],
     tools: Optional[list[dict[str, Any]]],
@@ -441,8 +446,7 @@ def resolve_chat_template_content_format(
     tokenizer: AnyTokenizer,
     *,
     model_config: ModelConfig,
-    # For backwards compatibility, keep deprecated args as kwargs
-    **kwargs: dict[str, Any],
+    trust_remote_code: Optional[bool] = None,
 ) -> _ChatTemplateContentFormat:
     detected_format = _resolve_chat_template_content_format(
         chat_template,
@@ -1200,6 +1204,10 @@ def parse_chat_messages_futures(
     return conversation, mm_tracker.all_mm_data()
 
 
+@deprecate_kwargs(
+    "trust_remote_code",
+    additional_message="Please use `model_config.trust_remote_code` instead.",
+)
 def apply_hf_chat_template(
     tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast],
     conversation: list[ConversationMessage],
@@ -1208,6 +1216,8 @@ def apply_hf_chat_template(
     *,
     model_config: ModelConfig,
     tokenize: bool = False,  # Different from HF's default
+    # Deprecated, explicitly capture here so it doesn't slit into kwargs.
+    trust_remote_code: Optional[bool] = None,
     **kwargs: Any,
 ) -> str:
     hf_chat_template = resolve_hf_chat_template(