Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions tests/entrypoints/openai/test_openai_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,10 @@ def test_openapi_stateless(case: schemathesis.Case):
case.operation.method.upper(),
case.operation.path,
)
if case.operation.path.startswith("/v1/responses"):
# Skip responses API as it is meant to be stateful.
return

timeout = {
# requires a longer timeout
("POST", "/v1/chat/completions"):
Expand Down
Empty file.
32 changes: 32 additions & 0 deletions tests/v1/entrypoints/openai/responses/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest
import pytest_asyncio

from tests.utils import RemoteOpenAIServer

# Use a small reasoning model to test the responses API.
MODEL_NAME = "Qwen/Qwen3-0.6B"


@pytest.fixture(scope="module")
def default_server_args():
return [
"--max-model-len",
"8192",
"--enforce-eager", # For faster startup.
"--reasoning-parser",
"deepseek_r1",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need deepseek_r1 here?

]


@pytest.fixture(scope="module")
def server(default_server_args):
with RemoteOpenAIServer(MODEL_NAME, default_server_args) as remote_server:
yield remote_server


@pytest_asyncio.fixture
async def client(server):
async with server.get_async_client() as async_client:
yield async_client
75 changes: 75 additions & 0 deletions tests/v1/entrypoints/openai/responses/test_basic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import openai # use the official client for correctness check
import pytest


@pytest.mark.asyncio
async def test_simple_input(client: openai.AsyncOpenAI):
response = await client.responses.create(input="What is 13 * 24?")
print(response)

outputs = response.output
# Whether the output contains the answer.
assert outputs[-1].type == "message"
assert "312" in outputs[-1].content[0].text

# Whether the output contains the reasoning.
assert outputs[0].type == "reasoning"
assert outputs[0].text != ""


@pytest.mark.asyncio
async def test_instructions(client: openai.AsyncOpenAI):
response = await client.responses.create(
instructions="Finish the answer with QED.",
input="What is 13 * 24?",
)
print(response)

output_text = response.output[-1].content[0].text
assert "312" in output_text
assert "QED" in output_text


@pytest.mark.asyncio
async def test_chat(client: openai.AsyncOpenAI):
response = await client.responses.create(input=[
{
"role": "system",
"content": "Finish the answer with QED."
},
{
"role": "user",
"content": "What is 5 * 3?"
},
{
"role": "assistant",
"content": "15. QED."
},
{
"role": "user",
"content": "Multiply the result by 2."
},
], )
print(response)

output_text = response.output[-1].content[0].text
assert "30" in output_text
assert "QED" in output_text


@pytest.mark.asyncio
async def test_chat_with_input_type(client: openai.AsyncOpenAI):
response = await client.responses.create(input=[
{
"role": "user",
"content": [{
"type": "input_text",
"text": "Hello!"
}],
},
], )
print(response)
assert response.status == "completed"
137 changes: 137 additions & 0 deletions tests/v1/entrypoints/openai/responses/test_stateful.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import asyncio

import openai
import pytest


@pytest.mark.asyncio
async def test_store(client: openai.AsyncOpenAI):
# By default, store is True.
response = await client.responses.create(input="Hello!")
assert response.status == "completed"

# Retrieve the response.
response = await client.responses.retrieve(response.id)
assert response.status == "completed"

# Test store=False.
response = await client.responses.create(
input="Hello!",
store=False,
)
assert response.status == "completed"

# The response should not be found.
with pytest.raises(openai.NotFoundError,
match="Response with id .* not found."):
await client.responses.retrieve(response.id)


@pytest.mark.asyncio
async def test_background(client: openai.AsyncOpenAI):
# NOTE: This query should be easy enough for the model to answer
# within the 10 seconds.
response = await client.responses.create(
input="Hello!",
background=True,
)
assert response.status == "queued"

max_retries = 10
for _ in range(max_retries):
await asyncio.sleep(1)
response = await client.responses.retrieve(response.id)
if response.status != "queued":
break
print(response)

assert response.status == "completed"


@pytest.mark.asyncio
async def test_background_error(client: openai.AsyncOpenAI):
with pytest.raises(
openai.BadRequestError,
match="background can only be used when `store` is true"):
_ = await client.responses.create(
input="What is 13 * 24?",
background=True,
store=False,
)


@pytest.mark.asyncio
async def test_background_cancel(client: openai.AsyncOpenAI):
response = await client.responses.create(
input="Write a long story about a cat.",
background=True,
)
assert response.status == "queued"

# Cancel the response before it is completed.
# FIXME: This test can be flaky.
await asyncio.sleep(0.5)
response = await client.responses.cancel(response.id)
assert response.status == "cancelled"

# Make sure the response status remains unchanged.
await asyncio.sleep(5)
response = await client.responses.retrieve(response.id)
assert response.status == "cancelled"


@pytest.mark.asyncio
async def test_cancel_completed(client: openai.AsyncOpenAI):
response = await client.responses.create(input="Hello")
assert response.status == "completed"

with pytest.raises(openai.BadRequestError,
match="Cannot cancel a synchronous response."):
await client.responses.cancel(response.id)


@pytest.mark.asyncio
async def test_previous_response_id(client: openai.AsyncOpenAI):
response1 = await client.responses.create(
instructions="You are tested on your ability to retrieve the correct "
"information from the previous response.",
input="Hello, my name is John.")

response2 = await client.responses.create(
input="Actually, my name is not John. My real name is Mark.",
previous_response_id=response1.id,
)

response3 = await client.responses.create(
input="What is my real name again? Answer in one word.",
previous_response_id=response2.id,
)
print(response3)
assert "Mark" in response3.output[-1].content[0].text
assert "John" not in response3.output[-1].content[0].text


@pytest.mark.asyncio
async def test_two_responses_with_same_prev_id(client: openai.AsyncOpenAI):
response1 = await client.responses.create(
instructions="You are tested on your ability to retrieve the correct "
"information from the previous response.",
input="Hello, my name is John.")

# Both response 2 and 3 use response 1 as the previous response.
response2 = client.responses.create(
input="Actually, my name is not John. My name is Mark.",
previous_response_id=response1.id,
)
response3 = client.responses.create(
input="What is my name again? Answer in one word.",
previous_response_id=response1.id,
)

_ = await response2
response3_result = await response3
print(response3_result)
assert "John" in response3_result.output[-1].content[0].text
assert "Mark" not in response3_result.output[-1].content[0].text
92 changes: 92 additions & 0 deletions tests/v1/entrypoints/openai/responses/test_structured_output.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import json

import openai
import pytest
from pydantic import BaseModel


@pytest.mark.asyncio
async def test_structured_output(client: openai.AsyncOpenAI):
response = await client.responses.create(
input=[
{
"role": "system",
"content": "Extract the event information."
},
{
"role": "user",
"content":
"Alice and Bob are going to a science fair on Friday.",
},
],
text={
"format": {
"type": "json_schema",
"name": "calendar_event",
"schema": {
"type": "object",
"properties": {
"event_name": {
"type": "string"
},
"date": {
"type": "string"
},
"participants": {
"type": "array",
"items": {
"type": "string"
}
},
},
"required": ["event_name", "date", "participants"],
"additionalProperties": False,
},
"description": "A calendar event.",
"strict": True,
}
},
)
print(response)

# NOTE: The JSON schema is applied to the output text, not reasoning.
output_text = response.output[-1].content[0].text
event = json.loads(output_text)

assert event["event_name"].lower() == "science fair"
assert event["date"] == "Friday"
participants = event["participants"]
assert len(participants) == 2
assert participants[0] == "Alice"
assert participants[1] == "Bob"


@pytest.mark.asyncio
async def test_structured_output_with_parse(client: openai.AsyncOpenAI):

class CalendarEvent(BaseModel):
event_name: str
date: str
participants: list[str]

response = await client.responses.parse(
model=None,
instructions="Extract the event information.",
input="Alice and Bob are going to a science fair on Friday.",
text_format=CalendarEvent,
)
print(response)

# The output is successfully parsed.
event = response.output_parsed
assert event is not None

# The output is correct.
assert event.event_name.lower() == "science fair"
assert event.date == "Friday"
participants = event.participants
assert len(participants) == 2
assert participants[0] == "Alice"
assert participants[1] == "Bob"
4 changes: 3 additions & 1 deletion vllm/entrypoints/chat_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -902,6 +902,8 @@ def _get_full_multimodal_text_prompt(placeholder_counts: dict[str, int],
] = {
"text":
lambda part: _TextParser(part).get("text", None),
"input_text":
lambda part: _TextParser(part).get("text", None),
"image_url":
lambda part: _ImageParser(part).get("image_url", {}).get("url", None),
"image_embeds":
Expand Down Expand Up @@ -1040,7 +1042,7 @@ def _parse_chat_message_content_part(
"with empty / unparsable content.", part, part_type)
return None

if part_type in ("text", "refusal"):
if part_type in ("text", "input_text", "refusal"):
str_content = cast(str, content)
if wrap_dicts:
return {'type': 'text', 'text': str_content}
Expand Down
Loading