44Run `pytest tests/models/test_mistral.py`.
55"""
66import json
7- import uuid
87from dataclasses import asdict
98from typing import TYPE_CHECKING , Any , Optional
109
1615from mistral_common .tokens .tokenizers .multimodal import image_from_chunk
1716from transformers import AutoProcessor
1817
19- from vllm import (EngineArgs , LLMEngine , RequestOutput , SamplingParams ,
20- TextPrompt , TokensPrompt )
18+ from vllm import RequestOutput , SamplingParams , TextPrompt , TokensPrompt
2119from vllm .multimodal import MultiModalDataBuiltins
2220from vllm .multimodal .inputs import PlaceholderRange
2321from vllm .sequence import Logprob , SampleLogprobs
2826if TYPE_CHECKING :
2927 from _typeshed import StrPath
3028
31- MODELS = ["mistralai/Pixtral-12B-2409" ]
29+ PIXTRAL_ID = "mistralai/Pixtral-12B-2409"
30+ MISTRAL_SMALL_3_1_ID = "mistralai/Mistral-Small-3.1-24B-Instruct-2503"
31+
32+ MODELS = [PIXTRAL_ID , MISTRAL_SMALL_3_1_ID ]
33+
3234IMG_URLS = [
3335 "https://picsum.photos/id/237/400/300" ,
3436 "https://picsum.photos/id/231/200/300" ,
@@ -125,8 +127,10 @@ def _create_engine_inputs_hf(urls: list[str]) -> TextPrompt:
125127FIXTURES_PATH = VLLM_PATH / "tests/models/fixtures"
126128assert FIXTURES_PATH .exists ()
127129
128- FIXTURE_LOGPROBS_CHAT = FIXTURES_PATH / "pixtral_chat.json"
129- FIXTURE_LOGPROBS_ENGINE = FIXTURES_PATH / "pixtral_chat_engine.json"
130+ FIXTURE_LOGPROBS_CHAT = {
131+ PIXTRAL_ID : FIXTURES_PATH / "pixtral_chat.json" ,
132+ MISTRAL_SMALL_3_1_ID : FIXTURES_PATH / "mistral_small_3_chat.json" ,
133+ }
130134
131135OutputsLogprobs = list [tuple [list [int ], str , Optional [SampleLogprobs ]]]
132136
@@ -166,12 +170,12 @@ def test_chat(
166170 model : str ,
167171 dtype : str ,
168172) -> None :
169- EXPECTED_CHAT_LOGPROBS = load_outputs_w_logprobs (FIXTURE_LOGPROBS_CHAT )
173+ EXPECTED_CHAT_LOGPROBS = load_outputs_w_logprobs (
174+ FIXTURE_LOGPROBS_CHAT [model ])
170175 with vllm_runner (
171176 model ,
172177 dtype = dtype ,
173178 tokenizer_mode = "mistral" ,
174- enable_chunked_prefill = False ,
175179 max_model_len = max_model_len ,
176180 limit_mm_per_prompt = LIMIT_MM_PER_PROMPT ,
177181 ) as vllm_model :
@@ -183,70 +187,40 @@ def test_chat(
183187 outputs .extend (output )
184188
185189 logprobs = vllm_runner ._final_steps_generate_w_logprobs (outputs )
190+ # Remove last `None` prompt_logprobs to compare with fixture
191+ for i in range (len (logprobs )):
192+ assert logprobs [i ][- 1 ] is None
193+ logprobs [i ] = logprobs [i ][:- 1 ]
186194 check_logprobs_close (outputs_0_lst = EXPECTED_CHAT_LOGPROBS ,
187195 outputs_1_lst = logprobs ,
188196 name_0 = "h100_ref" ,
189197 name_1 = "output" )
190198
191199
192- @large_gpu_test (min_gb = 80 )
193- @pytest .mark .parametrize ("model" , MODELS )
194- @pytest .mark .parametrize ("dtype" , ["bfloat16" ])
195- def test_model_engine (vllm_runner , model : str , dtype : str ) -> None :
196- EXPECTED_ENGINE_LOGPROBS = load_outputs_w_logprobs (FIXTURE_LOGPROBS_ENGINE )
197- args = EngineArgs (
198- model = model ,
199- tokenizer_mode = "mistral" ,
200- enable_chunked_prefill = False ,
201- limit_mm_per_prompt = LIMIT_MM_PER_PROMPT ,
202- dtype = dtype ,
203- )
204- engine = LLMEngine .from_engine_args (args )
205-
206- engine .add_request (uuid .uuid4 ().hex , ENGINE_INPUTS [0 ], SAMPLING_PARAMS )
207- engine .add_request (uuid .uuid4 ().hex , ENGINE_INPUTS [1 ], SAMPLING_PARAMS )
208-
209- outputs = []
210- count = 0
211- while True :
212- out = engine .step ()
213- count += 1
214- for request_output in out :
215- if request_output .finished :
216- outputs .append (request_output )
217-
218- if count == 2 :
219- engine .add_request (uuid .uuid4 ().hex , ENGINE_INPUTS [2 ],
220- SAMPLING_PARAMS )
221- if not engine .has_unfinished_requests ():
222- break
223-
224- logprobs = vllm_runner ._final_steps_generate_w_logprobs (outputs )
225- check_logprobs_close (outputs_0_lst = EXPECTED_ENGINE_LOGPROBS ,
226- outputs_1_lst = logprobs ,
227- name_0 = "h100_ref" ,
228- name_1 = "output" )
229-
230-
231200@large_gpu_test (min_gb = 48 )
232201@pytest .mark .parametrize (
233202 "prompt,expected_ranges" ,
234203 [(_create_engine_inputs_hf (IMG_URLS [:1 ]), [{
235- "offset" : 10 ,
204+ "offset" : 11 ,
236205 "length" : 494
237206 }]),
238207 (_create_engine_inputs_hf (IMG_URLS [1 :4 ]), [{
239- "offset" : 10 ,
208+ "offset" : 11 ,
240209 "length" : 266
241210 }, {
242- "offset" : 276 ,
211+ "offset" : 277 ,
243212 "length" : 1056
244213 }, {
245- "offset" : 1332 ,
214+ "offset" : 1333 ,
246215 "length" : 418
247216 }])])
248- def test_multi_modal_placeholders (
249- vllm_runner , prompt , expected_ranges : list [PlaceholderRange ]) -> None :
217+ def test_multi_modal_placeholders (vllm_runner , prompt ,
218+ expected_ranges : list [PlaceholderRange ],
219+ monkeypatch ) -> None :
220+
221+ # This placeholder checking test only works with V0 engine
222+ # where `multi_modal_placeholders` is returned with `RequestOutput`
223+ monkeypatch .setenv ("VLLM_USE_V1" , "0" )
250224 with vllm_runner (
251225 "mistral-community/pixtral-12b" ,
252226 max_model_len = 8192 ,
0 commit comments