Skip to content

Commit c4f0811

Browse files
inc-jeongUSER
authored andcommitted
[Bugfix] reasoning_parser parameter handling in run_batch.py (vllm-project#26225)
Signed-off-by: inc-jeong <inc.jeong@navercorp.com> Signed-off-by: InChang Jeong <inc.jeong@navercorp.com> Co-authored-by: USER <user@AL02367916.local> Signed-off-by: xuebwang-amd <xuebwang@amd.com>
1 parent 341b2bf commit c4f0811

File tree

2 files changed

+67
-0
lines changed

2 files changed

+67
-0
lines changed

tests/entrypoints/openai/test_run_batch.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@
3939
{"custom_id": "request-2", "method": "POST", "url": "/v1/rerank", "body": {"model": "BAAI/bge-reranker-v2-m3", "query": "What is the capital of France?", "documents": ["The capital of Brazil is Brasilia.", "The capital of France is Paris."]}}
4040
{"custom_id": "request-2", "method": "POST", "url": "/v2/rerank", "body": {"model": "BAAI/bge-reranker-v2-m3", "query": "What is the capital of France?", "documents": ["The capital of Brazil is Brasilia.", "The capital of France is Paris."]}}"""
4141

42+
INPUT_REASONING_BATCH = """{"custom_id": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "Qwen/Qwen3-0.6B", "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Solve this math problem: 2+2=?"}]}}
43+
{"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "Qwen/Qwen3-0.6B", "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "What is the capital of France?"}]}}"""
44+
4245

4346
def test_empty_file():
4447
with (
@@ -188,3 +191,50 @@ def test_score(input_batch):
188191
line_dict = json.loads(line)
189192
assert isinstance(line_dict, dict)
190193
assert line_dict["error"] is None
194+
195+
196+
def test_reasoning_parser():
197+
"""
198+
Test that reasoning_parser parameter works correctly in run_batch.
199+
"""
200+
with (
201+
tempfile.NamedTemporaryFile("w") as input_file,
202+
tempfile.NamedTemporaryFile("r") as output_file,
203+
):
204+
input_file.write(INPUT_REASONING_BATCH)
205+
input_file.flush()
206+
proc = subprocess.Popen(
207+
[
208+
"vllm",
209+
"run-batch",
210+
"-i",
211+
input_file.name,
212+
"-o",
213+
output_file.name,
214+
"--model",
215+
"Qwen/Qwen3-0.6B",
216+
"--reasoning-parser",
217+
"qwen3",
218+
],
219+
)
220+
proc.communicate()
221+
proc.wait()
222+
assert proc.returncode == 0, f"{proc=}"
223+
224+
contents = output_file.read()
225+
for line in contents.strip().split("\n"):
226+
# Ensure that the output format conforms to the openai api.
227+
# Validation should throw if the schema is wrong.
228+
BatchRequestOutput.model_validate_json(line)
229+
230+
# Ensure that there is no error in the response.
231+
line_dict = json.loads(line)
232+
assert isinstance(line_dict, dict)
233+
assert line_dict["error"] is None
234+
235+
# Check that reasoning_content is present and not empty
236+
reasoning_content = line_dict["response"]["body"]["choices"][0]["message"][
237+
"reasoning_content"
238+
]
239+
assert reasoning_content is not None
240+
assert len(reasoning_content) > 0

vllm/entrypoints/openai/run_batch.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels
3232
from vllm.entrypoints.openai.serving_score import ServingScores
3333
from vllm.logger import init_logger
34+
from vllm.reasoning import ReasoningParserManager
3435
from vllm.utils import FlexibleArgumentParser, random_uuid
3536
from vllm.version import __version__ as VLLM_VERSION
3637

@@ -331,6 +332,17 @@ async def run_request(
331332
return batch_output
332333

333334

335+
def validate_run_batch_args(args):
336+
valid_reasoning_parses = ReasoningParserManager.reasoning_parsers.keys()
337+
if (
338+
reasoning_parser := args.structured_outputs_config.reasoning_parser
339+
) and reasoning_parser not in valid_reasoning_parses:
340+
raise KeyError(
341+
f"invalid reasoning parser: {reasoning_parser} "
342+
f"(chose from {{ {','.join(valid_reasoning_parses)} }})"
343+
)
344+
345+
334346
async def run_batch(
335347
engine_client: EngineClient,
336348
args: Namespace,
@@ -359,6 +371,7 @@ async def run_batch(
359371
base_model_paths=base_model_paths,
360372
lora_modules=None,
361373
)
374+
362375
openai_serving_chat = (
363376
OpenAIServingChat(
364377
engine_client,
@@ -367,12 +380,14 @@ async def run_batch(
367380
request_logger=request_logger,
368381
chat_template=None,
369382
chat_template_content_format="auto",
383+
reasoning_parser=args.structured_outputs_config.reasoning_parser,
370384
enable_prompt_tokens_details=args.enable_prompt_tokens_details,
371385
enable_force_include_usage=args.enable_force_include_usage,
372386
)
373387
if "generate" in supported_tasks
374388
else None
375389
)
390+
376391
openai_serving_embedding = (
377392
OpenAIServingEmbedding(
378393
engine_client,
@@ -504,6 +519,8 @@ async def main(args: Namespace):
504519
from vllm.entrypoints.openai.api_server import build_async_engine_client
505520
from vllm.usage.usage_lib import UsageContext
506521

522+
validate_run_batch_args(args)
523+
507524
async with build_async_engine_client(
508525
args,
509526
usage_context=UsageContext.OPENAI_BATCH_RUNNER,

0 commit comments

Comments
 (0)