Skip to content

Commit d30e9fd

Browse files
juliendenize0xrushi
authored andcommitted
Refactor MistralTokenizer (vllm-project#26358)
Signed-off-by: Julien Denize <julien.denize@mistral.ai> Signed-off-by: 0xrushi <6279035+0xrushi@users.noreply.github.com>
1 parent dbb08e8 commit d30e9fd

File tree

18 files changed

+2349
-461
lines changed

18 files changed

+2349
-461
lines changed

docs/features/tool_calling.md

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ Supported models:
145145
Known issues:
146146

147147
1. Mistral 7B struggles to generate parallel tool calls correctly.
148-
2. Mistral's `tokenizer_config.json` chat template requires tool call IDs that are exactly 9 digits, which is
148+
2. **For Transformers tokenization backend only**: Mistral's `tokenizer_config.json` chat template requires tool call IDs that are exactly 9 digits, which is
149149
much shorter than what vLLM generates. Since an exception is thrown when this condition
150150
is not met, the following additional chat templates are provided:
151151

@@ -154,7 +154,14 @@ Known issues:
154154
* <gh-file:examples/tool_chat_template_mistral_parallel.jinja> - this is a "better" version that adds a tool-use system prompt
155155
when tools are provided, that results in much better reliability when working with parallel tool calling.
156156

157-
Recommended flags: `--tool-call-parser mistral --chat-template examples/tool_chat_template_mistral_parallel.jinja`
157+
Recommended flags:
158+
159+
1. To use [mistral-common](https://github.com/mistralai/mistral-common) the official Mistral tokenization backend:
160+
161+
`--tokenizer_mode mistral --config_format mistral --load_format mistral --tool-call-parser mistral`
162+
163+
2. To use the default Transformers tokenization backend:
164+
`--tool-call-parser mistral --chat-template examples/tool_chat_template_mistral_parallel.jinja`
158165

159166
### Llama Models (`llama3_json`)
160167

examples/offline_inference/audio_language.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,12 @@ class ModelRequestData(NamedTuple):
4545
# Voxtral
4646
def run_voxtral(question: str, audio_count: int) -> ModelRequestData:
4747
from mistral_common.audio import Audio
48-
from mistral_common.protocol.instruct.messages import (
48+
from mistral_common.protocol.instruct.chunk import (
4949
AudioChunk,
5050
RawAudio,
5151
TextChunk,
52+
)
53+
from mistral_common.protocol.instruct.messages import (
5254
UserMessage,
5355
)
5456
from mistral_common.protocol.instruct.request import ChatCompletionRequest

requirements/common.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ pyzmq >= 25.0.0
3232
msgspec
3333
gguf >= 0.13.0
3434
importlib_metadata; python_version < '3.10'
35-
mistral_common[image,audio] >= 1.8.2
35+
mistral_common[image,audio] >= 1.8.5
3636
opencv-python-headless >= 4.11.0 # required for video IO
3737
pyyaml
3838
six>=1.16.0; python_version > '3.11' # transitive dependency of pandas that needs to be the latest version for python 3.12

requirements/nightly_torch_test.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ jiwer # required for audio tests
2323
timm # required for internvl test
2424
transformers_stream_generator # required for qwen-vl test
2525
matplotlib # required for qwen-vl test
26-
mistral_common[image,audio] >= 1.8.2 # required for voxtral test
26+
mistral_common[image,audio] >= 1.8.5 # required for voxtral test
2727
num2words # required for smolvlm test
2828
opencv-python-headless >= 4.11.0 # required for video test
2929
datamodel_code_generator # required for minicpm3 test

requirements/test.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ torchaudio==2.8.0
2929
torchvision==0.23.0
3030
transformers_stream_generator # required for qwen-vl test
3131
matplotlib # required for qwen-vl test
32-
mistral_common[image,audio] >= 1.8.2 # required for voxtral test
32+
mistral_common[image,audio] >= 1.8.5 # required for voxtral test
3333
num2words # required for smolvlm test
3434
open_clip_torch==2.32.0 # Required for nemotron_vl test
3535
opencv-python-headless >= 4.11.0 # required for video test

requirements/test.txt

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -474,7 +474,7 @@ mbstrdecoder==1.1.3
474474
# typepy
475475
mdurl==0.1.2
476476
# via markdown-it-py
477-
mistral-common==1.8.2
477+
mistral-common==1.8.5
478478
# via -r requirements/test.in
479479
mlflow==2.22.0
480480
# via terratorch
@@ -1012,8 +1012,6 @@ sentence-transformers==3.2.1
10121012
# via
10131013
# -r requirements/test.in
10141014
# mteb
1015-
sentencepiece==0.2.0
1016-
# via mistral-common
10171015
setuptools==77.0.3
10181016
# via
10191017
# lightning-utilities

tests/entrypoints/test_chat_utils.py

Lines changed: 2 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,7 @@
66
from typing import Literal, Optional
77

88
import pytest
9-
from mistral_common.tokens.tokenizers.base import SpecialTokenPolicy, SpecialTokens
10-
from mistral_common.tokens.tokenizers.tekken import SpecialTokenInfo, Tekkenizer
9+
from mistral_common.tokens.tokenizers.base import SpecialTokenPolicy
1110

1211
from vllm.assets.audio import AudioAsset
1312
from vllm.assets.image import ImageAsset
@@ -2119,34 +2118,9 @@ def test_apply_mistral_chat_template_thinking_chunk():
21192118
},
21202119
{"role": "user", "content": "Thanks, what is 3+3?"},
21212120
]
2122-
2123-
# TODO(Julien): upon model release change to a tokenizer already configured.
2124-
# =================================================================
21252121
mistral_tokenizer = MistralTokenizer.from_pretrained(
2126-
"mistralai/Devstral-Small-2507"
2127-
)
2128-
assert isinstance(mistral_tokenizer.tokenizer, Tekkenizer)
2129-
# Add think special tokens to the tokenizer
2130-
mistral_tokenizer.tokenizer._all_special_tokens[35] = SpecialTokenInfo(
2131-
rank=35, is_control=True, token_str=SpecialTokens.begin_think.value
2122+
"mistralai/Magistral-Small-2509"
21322123
)
2133-
mistral_tokenizer.tokenizer._all_special_tokens[36] = SpecialTokenInfo(
2134-
rank=36, is_control=True, token_str=SpecialTokens.end_think.value
2135-
)
2136-
mistral_tokenizer.tokenizer._special_tokens_reverse_vocab = {
2137-
k: v
2138-
for k, v in mistral_tokenizer.tokenizer._special_tokens_reverse_vocab.items()
2139-
if v not in {35, 36}
2140-
}
2141-
mistral_tokenizer.tokenizer._special_tokens_reverse_vocab[
2142-
SpecialTokens.begin_think.value
2143-
] = 35
2144-
mistral_tokenizer.tokenizer._special_tokens_reverse_vocab[
2145-
SpecialTokens.end_think.value
2146-
] = 36
2147-
mistral_tokenizer.instruct.BEGIN_THINK = 35
2148-
mistral_tokenizer.instruct.END_THINK = 36
2149-
# =================================================================
21502124

21512125
tokens_ids = apply_mistral_chat_template(
21522126
mistral_tokenizer, messages, chat_template=None, tools=None

tests/models/multimodal/generation/test_pixtral.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
import pytest
88
from mistral_common.multimodal import download_image
9-
from mistral_common.protocol.instruct.messages import ImageURLChunk
9+
from mistral_common.protocol.instruct.chunk import ImageURLChunk
1010
from mistral_common.protocol.instruct.request import ChatCompletionRequest
1111
from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
1212
from mistral_common.tokens.tokenizers.multimodal import image_from_chunk

tests/models/multimodal/generation/test_voxtral.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,8 @@
66
import pytest
77
import pytest_asyncio
88
from mistral_common.audio import Audio
9-
from mistral_common.protocol.instruct.messages import (
10-
AudioChunk,
11-
RawAudio,
12-
TextChunk,
13-
UserMessage,
14-
)
9+
from mistral_common.protocol.instruct.chunk import AudioChunk, RawAudio, TextChunk
10+
from mistral_common.protocol.instruct.messages import UserMessage
1511

1612
from vllm.transformers_utils.tokenizer import MistralTokenizer
1713

tests/models/multimodal/processing/test_common.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66

77
import numpy as np
88
import pytest
9-
from mistral_common.protocol.instruct.messages import ImageChunk, TextChunk, UserMessage
9+
from mistral_common.protocol.instruct.chunk import ImageChunk, TextChunk
10+
from mistral_common.protocol.instruct.messages import UserMessage
1011
from mistral_common.protocol.instruct.request import ChatCompletionRequest
1112
from PIL import Image
1213

0 commit comments

Comments
 (0)