Skip to content

Commit a9cfefd

Browse files
lk-chenYuqi Zhang
authored andcommitted
[Fix] Move "model_config" as keyword args in chat_utils.py (vllm-project#18098)
Signed-off-by: Linkun <github@lkchen.net> Signed-off-by: Yuqi Zhang <yuqizhang@google.com>
1 parent 1ad2cd4 commit a9cfefd

File tree

6 files changed

+42
-23
lines changed

6 files changed

+42
-23
lines changed

tests/entrypoints/openai/test_chat_template.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,10 +122,10 @@ def test_get_gen_prompt(model, template, add_generation_prompt,
122122

123123
# Call the function and get the result
124124
result = apply_hf_chat_template(
125-
model_config,
126-
tokenizer,
125+
tokenizer=tokenizer,
127126
conversation=mock_request.messages,
128127
chat_template=mock_request.chat_template or template_content,
128+
model_config=model_config,
129129
tools=None,
130130
add_generation_prompt=mock_request.add_generation_prompt,
131131
continue_final_message=mock_request.continue_final_message,

tests/entrypoints/test_chat_utils.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -793,10 +793,10 @@ def get_conversation(is_hf: bool):
793793
)
794794

795795
vllm_result = apply_hf_chat_template(
796-
model_config,
797-
tokenizer,
796+
tokenizer=tokenizer,
798797
conversation=conversation,
799798
chat_template=None,
799+
model_config=model_config,
800800
tools=None,
801801
add_generation_prompt=True,
802802
)
@@ -903,11 +903,11 @@ def test_resolve_content_format_hf_defined(model, expected_format):
903903
print(_try_extract_ast(chat_template))
904904

905905
resolved_format = resolve_chat_template_content_format(
906-
model_config,
907906
None, # Test detecting the tokenizer's chat_template
908907
None,
909908
"auto",
910909
tokenizer,
910+
model_config=model_config,
911911
)
912912

913913
assert resolved_format == expected_format
@@ -962,11 +962,11 @@ def test_resolve_content_format_fallbacks(model, expected_format):
962962
print(_try_extract_ast(chat_template))
963963

964964
resolved_format = resolve_chat_template_content_format(
965-
model_config,
966965
None, # Test detecting the tokenizer's chat_template
967966
None,
968967
"auto",
969968
tokenizer,
969+
model_config=model_config,
970970
)
971971

972972
assert resolved_format == expected_format
@@ -1021,11 +1021,11 @@ def test_resolve_content_format_examples(template_path, expected_format):
10211021
print(_try_extract_ast(chat_template))
10221022

10231023
resolved_format = resolve_chat_template_content_format(
1024-
model_config,
10251024
chat_template,
10261025
None,
10271026
"auto",
10281027
dummy_tokenizer,
1028+
model_config=model_config,
10291029
)
10301030

10311031
assert resolved_format == expected_format

vllm/entrypoints/chat_utils.py

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
# yapf: enable
4545
from vllm.transformers_utils.processor import cached_get_processor
4646
from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer
47-
from vllm.utils import random_uuid
47+
from vllm.utils import deprecate_kwargs, random_uuid
4848

4949
logger = init_logger(__name__)
5050

@@ -329,11 +329,17 @@ def resolve_mistral_chat_template(
329329
"so it will be ignored.")
330330
return None
331331

332+
@deprecate_kwargs(
333+
"trust_remote_code",
334+
additional_message="Please use `model_config.trust_remote_code` instead.",
335+
)
332336
def resolve_hf_chat_template(
333-
model_config: ModelConfig,
334337
tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast],
335338
chat_template: Optional[str],
336339
tools: Optional[list[dict[str, Any]]],
340+
*,
341+
model_config: ModelConfig,
342+
trsut_remote_code: Optional[bool] = None,
337343
) -> Optional[str]:
338344
# 1st priority: The given chat template
339345
if chat_template is not None:
@@ -379,18 +385,19 @@ def resolve_hf_chat_template(
379385

380386

381387
def _resolve_chat_template_content_format(
382-
model_config: ModelConfig,
383388
chat_template: Optional[str],
384389
tools: Optional[list[dict[str, Any]]],
385390
given_format: ChatTemplateContentFormatOption,
386391
tokenizer: AnyTokenizer,
392+
*,
393+
model_config: ModelConfig,
387394
) -> _ChatTemplateContentFormat:
388395
if isinstance(tokenizer, (PreTrainedTokenizer, PreTrainedTokenizerFast)):
389396
hf_chat_template = resolve_hf_chat_template(
390-
model_config,
391397
tokenizer,
392398
chat_template=chat_template,
393399
tools=tools,
400+
model_config=model_config,
394401
)
395402
else:
396403
hf_chat_template = None
@@ -428,19 +435,25 @@ def _log_chat_template_content_format(
428435
)
429436

430437

438+
@deprecate_kwargs(
439+
"trust_remote_code",
440+
additional_message="Please use `model_config.trust_remote_code` instead.",
441+
)
431442
def resolve_chat_template_content_format(
432-
model_config: ModelConfig,
433443
chat_template: Optional[str],
434444
tools: Optional[list[dict[str, Any]]],
435445
given_format: ChatTemplateContentFormatOption,
436446
tokenizer: AnyTokenizer,
447+
*,
448+
model_config: ModelConfig,
449+
trust_remote_code: Optional[bool] = None,
437450
) -> _ChatTemplateContentFormat:
438451
detected_format = _resolve_chat_template_content_format(
439-
model_config,
440452
chat_template,
441453
tools,
442454
given_format,
443455
tokenizer,
456+
model_config=model_config,
444457
)
445458

446459
_log_chat_template_content_format(
@@ -1191,21 +1204,27 @@ def parse_chat_messages_futures(
11911204
return conversation, mm_tracker.all_mm_data()
11921205

11931206

1207+
@deprecate_kwargs(
1208+
"trust_remote_code",
1209+
additional_message="Please use `model_config.trust_remote_code` instead.",
1210+
)
11941211
def apply_hf_chat_template(
1195-
model_config: ModelConfig,
11961212
tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast],
11971213
conversation: list[ConversationMessage],
11981214
chat_template: Optional[str],
11991215
tools: Optional[list[dict[str, Any]]],
12001216
*,
1217+
model_config: ModelConfig,
12011218
tokenize: bool = False, # Different from HF's default
1219+
# Deprecated, explicitly capture here so it doesn't slit into kwargs.
1220+
trust_remote_code: Optional[bool] = None,
12021221
**kwargs: Any,
12031222
) -> str:
12041223
hf_chat_template = resolve_hf_chat_template(
1205-
model_config,
12061224
tokenizer,
12071225
chat_template=chat_template,
12081226
tools=tools,
1227+
model_config=model_config,
12091228
)
12101229

12111230
if hf_chat_template is None:

vllm/entrypoints/llm.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -731,11 +731,11 @@ def chat(
731731
tokenizer = self.get_tokenizer(lora_request)
732732
model_config = self.llm_engine.get_model_config()
733733
resolved_content_format = resolve_chat_template_content_format(
734-
model_config,
735734
chat_template,
736735
tools,
737736
chat_template_content_format,
738737
tokenizer,
738+
model_config=model_config,
739739
)
740740

741741
_chat_template_kwargs: dict[str, Any] = dict(
@@ -767,9 +767,9 @@ def chat(
767767
)
768768
else:
769769
prompt_str = apply_hf_chat_template(
770-
model_config,
771-
tokenizer,
770+
tokenizer=tokenizer,
772771
conversation=conversation,
772+
model_config=model_config,
773773
**_chat_template_kwargs,
774774
)
775775
# Special tokens are already included in chat templates so

vllm/entrypoints/openai/api_server.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -971,10 +971,10 @@ async def init_app_state(
971971
chat_template=resolved_chat_template)
972972
else:
973973
hf_chat_template = resolve_hf_chat_template(
974-
vllm_config.model_config,
975-
tokenizer,
974+
tokenizer=tokenizer,
976975
chat_template=None,
977976
tools=None,
977+
model_config=vllm_config.model_config,
978978
)
979979

980980
if hf_chat_template != resolved_chat_template:

vllm/entrypoints/openai/serving_engine.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -670,11 +670,11 @@ async def _preprocess_chat(
670670
model_config = self.model_config
671671

672672
resolved_content_format = resolve_chat_template_content_format(
673-
model_config,
674673
chat_template,
675674
tool_dicts,
676675
chat_template_content_format,
677676
tokenizer,
677+
model_config=model_config,
678678
)
679679
conversation, mm_data_future = parse_chat_messages_futures(
680680
messages,
@@ -701,9 +701,9 @@ async def _preprocess_chat(
701701
)
702702
else:
703703
request_prompt = apply_hf_chat_template(
704-
model_config,
705-
tokenizer,
704+
tokenizer=tokenizer,
706705
conversation=conversation,
706+
model_config=model_config,
707707
**_chat_template_kwargs,
708708
)
709709

0 commit comments

Comments
 (0)