Skip to content

Commit f863ffc

Browse files
[Mistral-Small 3.1] Update docs and tests (#14977)
Signed-off-by: Roger Wang <ywang@roblox.com> Co-authored-by: Roger Wang <ywang@roblox.com>
1 parent 400d483 commit f863ffc

File tree

5 files changed

+34
-60
lines changed

5 files changed

+34
-60
lines changed

docs/source/models/supported_models.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -879,7 +879,7 @@ See [this page](#generative-models) for more information on how to use generativ
879879
- * `PixtralForConditionalGeneration`
880880
* Pixtral
881881
* T + I<sup>+</sup>
882-
* `mistralai/Pixtral-12B-2409`, `mistral-community/pixtral-12b`, etc.
882+
* `mistralai/Mistral-Small-3.1-24B-Instruct-2503`, `mistral-community/pixtral-12b`, etc.
883883
*
884884
* ✅︎
885885
* ✅︎

examples/offline_inference/pixtral.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,14 @@
66
from vllm import LLM
77
from vllm.sampling_params import SamplingParams
88

9-
# This script is an offline demo for running Pixtral.
9+
# This script is an offline demo for running Mistral-Small-3
1010
#
1111
# If you want to run a server/client setup, please follow this code:
1212
#
1313
# - Server:
1414
#
1515
# ```bash
16-
# vllm serve mistralai/Pixtral-12B-2409 --tokenizer-mode mistral --limit-mm-per-prompt 'image=4' --max-model-len 16384
16+
# vllm serve mistralai/Mistral-Small-3.1-24B-Instruct-2503 --tokenizer-mode mistral --limit-mm-per-prompt 'image=4' --max-model-len 16384
1717
# ```
1818
#
1919
# - Client:
@@ -23,7 +23,7 @@
2323
# --header 'Content-Type: application/json' \
2424
# --header 'Authorization: Bearer token' \
2525
# --data '{
26-
# "model": "mistralai/Pixtral-12B-2409",
26+
# "model": "mistralai/Mistral-Small-3.1-24B-Instruct-2503",
2727
# "messages": [
2828
# {
2929
# "role": "user",
@@ -44,7 +44,7 @@
4444

4545

4646
def run_simple_demo(args: argparse.Namespace):
47-
model_name = "mistralai/Pixtral-12B-2409"
47+
model_name = "mistralai/Mistral-Small-3.1-24B-Instruct-2503"
4848
sampling_params = SamplingParams(max_tokens=8192)
4949

5050
# Lower max_model_len and/or max_num_seqs on low-VRAM GPUs.
@@ -83,7 +83,7 @@ def run_simple_demo(args: argparse.Namespace):
8383

8484

8585
def run_advanced_demo(args: argparse.Namespace):
86-
model_name = "mistralai/Pixtral-12B-2409"
86+
model_name = "mistralai/Mistral-Small-3.1-24B-Instruct-2503"
8787
max_img_per_msg = 5
8888
max_tokens_per_img = 4096
8989

tests/models/decoder_only/vision_language/test_pixtral.py

Lines changed: 27 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
Run `pytest tests/models/test_mistral.py`.
55
"""
66
import json
7-
import uuid
87
from dataclasses import asdict
98
from typing import TYPE_CHECKING, Any, Optional
109

@@ -16,8 +15,7 @@
1615
from mistral_common.tokens.tokenizers.multimodal import image_from_chunk
1716
from transformers import AutoProcessor
1817

19-
from vllm import (EngineArgs, LLMEngine, RequestOutput, SamplingParams,
20-
TextPrompt, TokensPrompt)
18+
from vllm import RequestOutput, SamplingParams, TextPrompt, TokensPrompt
2119
from vllm.multimodal import MultiModalDataBuiltins
2220
from vllm.multimodal.inputs import PlaceholderRange
2321
from vllm.sequence import Logprob, SampleLogprobs
@@ -28,7 +26,11 @@
2826
if TYPE_CHECKING:
2927
from _typeshed import StrPath
3028

31-
MODELS = ["mistralai/Pixtral-12B-2409"]
29+
PIXTRAL_ID = "mistralai/Pixtral-12B-2409"
30+
MISTRAL_SMALL_3_1_ID = "mistralai/Mistral-Small-3.1-24B-Instruct-2503"
31+
32+
MODELS = [PIXTRAL_ID, MISTRAL_SMALL_3_1_ID]
33+
3234
IMG_URLS = [
3335
"https://picsum.photos/id/237/400/300",
3436
"https://picsum.photos/id/231/200/300",
@@ -125,8 +127,10 @@ def _create_engine_inputs_hf(urls: list[str]) -> TextPrompt:
125127
FIXTURES_PATH = VLLM_PATH / "tests/models/fixtures"
126128
assert FIXTURES_PATH.exists()
127129

128-
FIXTURE_LOGPROBS_CHAT = FIXTURES_PATH / "pixtral_chat.json"
129-
FIXTURE_LOGPROBS_ENGINE = FIXTURES_PATH / "pixtral_chat_engine.json"
130+
FIXTURE_LOGPROBS_CHAT = {
131+
PIXTRAL_ID: FIXTURES_PATH / "pixtral_chat.json",
132+
MISTRAL_SMALL_3_1_ID: FIXTURES_PATH / "mistral_small_3_chat.json",
133+
}
130134

131135
OutputsLogprobs = list[tuple[list[int], str, Optional[SampleLogprobs]]]
132136

@@ -166,12 +170,12 @@ def test_chat(
166170
model: str,
167171
dtype: str,
168172
) -> None:
169-
EXPECTED_CHAT_LOGPROBS = load_outputs_w_logprobs(FIXTURE_LOGPROBS_CHAT)
173+
EXPECTED_CHAT_LOGPROBS = load_outputs_w_logprobs(
174+
FIXTURE_LOGPROBS_CHAT[model])
170175
with vllm_runner(
171176
model,
172177
dtype=dtype,
173178
tokenizer_mode="mistral",
174-
enable_chunked_prefill=False,
175179
max_model_len=max_model_len,
176180
limit_mm_per_prompt=LIMIT_MM_PER_PROMPT,
177181
) as vllm_model:
@@ -183,70 +187,40 @@ def test_chat(
183187
outputs.extend(output)
184188

185189
logprobs = vllm_runner._final_steps_generate_w_logprobs(outputs)
190+
# Remove last `None` prompt_logprobs to compare with fixture
191+
for i in range(len(logprobs)):
192+
assert logprobs[i][-1] is None
193+
logprobs[i] = logprobs[i][:-1]
186194
check_logprobs_close(outputs_0_lst=EXPECTED_CHAT_LOGPROBS,
187195
outputs_1_lst=logprobs,
188196
name_0="h100_ref",
189197
name_1="output")
190198

191199

192-
@large_gpu_test(min_gb=80)
193-
@pytest.mark.parametrize("model", MODELS)
194-
@pytest.mark.parametrize("dtype", ["bfloat16"])
195-
def test_model_engine(vllm_runner, model: str, dtype: str) -> None:
196-
EXPECTED_ENGINE_LOGPROBS = load_outputs_w_logprobs(FIXTURE_LOGPROBS_ENGINE)
197-
args = EngineArgs(
198-
model=model,
199-
tokenizer_mode="mistral",
200-
enable_chunked_prefill=False,
201-
limit_mm_per_prompt=LIMIT_MM_PER_PROMPT,
202-
dtype=dtype,
203-
)
204-
engine = LLMEngine.from_engine_args(args)
205-
206-
engine.add_request(uuid.uuid4().hex, ENGINE_INPUTS[0], SAMPLING_PARAMS)
207-
engine.add_request(uuid.uuid4().hex, ENGINE_INPUTS[1], SAMPLING_PARAMS)
208-
209-
outputs = []
210-
count = 0
211-
while True:
212-
out = engine.step()
213-
count += 1
214-
for request_output in out:
215-
if request_output.finished:
216-
outputs.append(request_output)
217-
218-
if count == 2:
219-
engine.add_request(uuid.uuid4().hex, ENGINE_INPUTS[2],
220-
SAMPLING_PARAMS)
221-
if not engine.has_unfinished_requests():
222-
break
223-
224-
logprobs = vllm_runner._final_steps_generate_w_logprobs(outputs)
225-
check_logprobs_close(outputs_0_lst=EXPECTED_ENGINE_LOGPROBS,
226-
outputs_1_lst=logprobs,
227-
name_0="h100_ref",
228-
name_1="output")
229-
230-
231200
@large_gpu_test(min_gb=48)
232201
@pytest.mark.parametrize(
233202
"prompt,expected_ranges",
234203
[(_create_engine_inputs_hf(IMG_URLS[:1]), [{
235-
"offset": 10,
204+
"offset": 11,
236205
"length": 494
237206
}]),
238207
(_create_engine_inputs_hf(IMG_URLS[1:4]), [{
239-
"offset": 10,
208+
"offset": 11,
240209
"length": 266
241210
}, {
242-
"offset": 276,
211+
"offset": 277,
243212
"length": 1056
244213
}, {
245-
"offset": 1332,
214+
"offset": 1333,
246215
"length": 418
247216
}])])
248-
def test_multi_modal_placeholders(
249-
vllm_runner, prompt, expected_ranges: list[PlaceholderRange]) -> None:
217+
def test_multi_modal_placeholders(vllm_runner, prompt,
218+
expected_ranges: list[PlaceholderRange],
219+
monkeypatch) -> None:
220+
221+
# This placeholder checking test only works with V0 engine
222+
# where `multi_modal_placeholders` is returned with `RequestOutput`
223+
monkeypatch.setenv("VLLM_USE_V1", "0")
250224
with vllm_runner(
251225
"mistral-community/pixtral-12b",
252226
max_model_len=8192,

tests/models/fixtures/mistral_small_3_chat.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

tests/models/fixtures/pixtral_chat_engine.json

Lines changed: 0 additions & 1 deletion
This file was deleted.

0 commit comments

Comments
 (0)