move model_config after *

lk-chen · lk-chen · commit 9a61e639cce3 · 2025-05-13T12:41:23.000-07:00
Signed-off-by: Linkun &lt;github@lkchen.net&gt;
diff --git a/tests/entrypoints/test_chat_utils.py b/tests/entrypoints/test_chat_utils.py
@@ -903,11 +903,11 @@ def test_resolve_content_format_hf_defined(model, expected_format):
     print(_try_extract_ast(chat_template))
 
     resolved_format = resolve_chat_template_content_format(
-        model_config,
         None,  # Test detecting the tokenizer's chat_template
         None,
         "auto",
         tokenizer,
+        model_config=model_config,
     )
 
     assert resolved_format == expected_format
@@ -962,11 +962,11 @@ def test_resolve_content_format_fallbacks(model, expected_format):
     print(_try_extract_ast(chat_template))
 
     resolved_format = resolve_chat_template_content_format(
-        model_config,
         None,  # Test detecting the tokenizer's chat_template
         None,
         "auto",
         tokenizer,
+        model_config=model_config,
     )
 
     assert resolved_format == expected_format
@@ -1021,11 +1021,11 @@ def test_resolve_content_format_examples(template_path, expected_format):
     print(_try_extract_ast(chat_template))
 
     resolved_format = resolve_chat_template_content_format(
-        model_config,
         chat_template,
         None,
         "auto",
         dummy_tokenizer,
+        model_config=model_config,
     )
 
     assert resolved_format == expected_format
diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
@@ -330,10 +330,13 @@ def resolve_mistral_chat_template(
     return None
 
 def resolve_hf_chat_template(
-    model_config: ModelConfig,
     tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast],
     chat_template: Optional[str],
     tools: Optional[list[dict[str, Any]]],
+    *,
+    model_config: ModelConfig,
+    # For backwards compatibility, keep deprecated args as kwargs
+    **kwargs: dict[str, Any],
 ) -> Optional[str]:
     # 1st priority: The given chat template
     if chat_template is not None:
@@ -379,18 +382,21 @@ def resolve_hf_chat_template(
 
 
 def _resolve_chat_template_content_format(
-    model_config: ModelConfig,
     chat_template: Optional[str],
     tools: Optional[list[dict[str, Any]]],
     given_format: ChatTemplateContentFormatOption,
     tokenizer: AnyTokenizer,
+    *,
+    model_config: ModelConfig,
+    # For backwards compatibility, keep deprecated args as kwargs
+    **kwargs: dict[str, Any],
 ) -> _ChatTemplateContentFormat:
     if isinstance(tokenizer, (PreTrainedTokenizer, PreTrainedTokenizerFast)):
         hf_chat_template = resolve_hf_chat_template(
-            model_config,
             tokenizer,
             chat_template=chat_template,
             tools=tools,
+            model_config=model_config,
         )
     else:
         hf_chat_template = None
@@ -429,18 +435,21 @@ def _log_chat_template_content_format(
 
 
 def resolve_chat_template_content_format(
-    model_config: ModelConfig,
     chat_template: Optional[str],
     tools: Optional[list[dict[str, Any]]],
     given_format: ChatTemplateContentFormatOption,
     tokenizer: AnyTokenizer,
+    *,
+    model_config: ModelConfig,
+    # For backwards compatibility, keep deprecated args as kwargs
+    **kwargs: dict[str, Any],
 ) -> _ChatTemplateContentFormat:
     detected_format = _resolve_chat_template_content_format(
-        model_config,
         chat_template,
         tools,
         given_format,
         tokenizer,
+        model_config=model_config,
     )
 
     _log_chat_template_content_format(
diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py
@@ -731,11 +731,11 @@ def chat(
         tokenizer = self.get_tokenizer(lora_request)
         model_config = self.llm_engine.get_model_config()
         resolved_content_format = resolve_chat_template_content_format(
-            model_config,
             chat_template,
             tools,
             chat_template_content_format,
             tokenizer,
+            model_config=model_config,
         )
 
         _chat_template_kwargs: dict[str, Any] = dict(
diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py
@@ -670,11 +670,11 @@ async def _preprocess_chat(
         model_config = self.model_config
 
         resolved_content_format = resolve_chat_template_content_format(
-            model_config,
             chat_template,
             tool_dicts,
             chat_template_content_format,
             tokenizer,
+            model_config=model_config,
         )
         conversation, mm_data_future = parse_chat_messages_futures(
             messages,