From 70bc1b0d668e9fc9a1eedfd57c2139a02629a47c Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Thu, 27 Mar 2025 03:14:29 +0000 Subject: [PATCH 01/11] [Bugfix] Structured output supports Enum. Signed-off-by: chaunceyjiang --- vllm/v1/structured_output/utils.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/vllm/v1/structured_output/utils.py b/vllm/v1/structured_output/utils.py index 694e46f763f0..a771256ef29f 100644 --- a/vllm/v1/structured_output/utils.py +++ b/vllm/v1/structured_output/utils.py @@ -26,10 +26,6 @@ def check_object(obj: dict[str, Any]) -> bool: if "pattern" in obj: return True - # Check for enum restrictions - if "enum" in obj: - return True - # Check for numeric ranges if obj.get("type") in ("integer", "number") and any( key in obj From 4246774aa030eb35156e42dc3049bc9fdf06685d Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Thu, 27 Mar 2025 09:34:39 +0000 Subject: [PATCH 02/11] [Bugfix] Structured output supports Enum. Signed-off-by: chaunceyjiang --- .../openai/test_structured_output.py | 84 +++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 tests/entrypoints/openai/test_structured_output.py diff --git a/tests/entrypoints/openai/test_structured_output.py b/tests/entrypoints/openai/test_structured_output.py new file mode 100644 index 000000000000..ef35f82f107e --- /dev/null +++ b/tests/entrypoints/openai/test_structured_output.py @@ -0,0 +1,84 @@ +# SPDX-License-Identifier: Apache-2.0 + +from typing import NamedTuple +from pydantic import BaseModel, ValidationError +import openai # use the official client for correctness check +import pytest +from enum import Enum +import pytest_asyncio + +from ...utils import RemoteOpenAIServer + +# # any model with a chat template should work here +MODEL_NAME = "Qwen/Qwen2.5-3B-Instruct" + +@pytest.fixture(scope="module") +def server(): + args = [ + # use half precision for speed and memory savings in CI environment + "--dtype", + "float16", + ] + + with RemoteOpenAIServer(MODEL_NAME, args) as remote_server: + yield remote_server + + +@pytest_asyncio.fixture +async def client(server): + async with server.get_async_client() as async_client: + yield async_client + + +class CarType(str, Enum): + sedan = "sedan" + suv = "SUV" + truck = "Truck" + coupe = "Coupe" + + +class CarDescription(BaseModel): + brand: str + model: str + car_type: CarType + + +class TestCase(NamedTuple): + model_name: str + structured: bool + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "test_case", + [ + TestCase(model_name=MODEL_NAME, structured=True), + TestCase(model_name=MODEL_NAME, structured=False) + ], +) +async def test_structured_output_with_enum(client: openai.AsyncOpenAI, + test_case: TestCase): + prompt = ("Generate a JSON with the brand, model and car_type of" + "the most iconic car from the 90's") + extra_body = None + if test_case.structured: + json_schema = CarDescription.model_json_schema() + extra_body = {"guided_json": json_schema} + chat_completion = await client.chat.completions.create( + model=test_case.model_name, + messages=[{ + "role": "user", + "content": prompt, + }], + extra_body=extra_body) + assert chat_completion.id is not None + + choice = chat_completion.choices[0] + assert choice.finish_reason == "stop" + message = choice.message + if test_case.structured: + CarDescription.model_validate_json(message.content) + else: + with pytest.raises(ValidationError): + CarDescription.model_validate_json(message.content) + assert message.role == "assistant" From 1bfbb277a2799cf28a353954e5a2b3b4c95fe453 Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Thu, 27 Mar 2025 09:57:47 +0000 Subject: [PATCH 03/11] [Bugfix] Structured output supports Enum. Signed-off-by: chaunceyjiang --- tests/entrypoints/openai/test_structured_output.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/entrypoints/openai/test_structured_output.py b/tests/entrypoints/openai/test_structured_output.py index ef35f82f107e..9eb42e44b2d8 100644 --- a/tests/entrypoints/openai/test_structured_output.py +++ b/tests/entrypoints/openai/test_structured_output.py @@ -1,17 +1,16 @@ # SPDX-License-Identifier: Apache-2.0 - +from enum import Enum from typing import NamedTuple -from pydantic import BaseModel, ValidationError import openai # use the official client for correctness check import pytest -from enum import Enum import pytest_asyncio from ...utils import RemoteOpenAIServer - +from pydantic import BaseModel, ValidationError # # any model with a chat template should work here MODEL_NAME = "Qwen/Qwen2.5-3B-Instruct" + @pytest.fixture(scope="module") def server(): args = [ From 794f3365197b01ef01ef6f96a7835e50d3441189 Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Thu, 27 Mar 2025 10:25:06 +0000 Subject: [PATCH 04/11] [Bugfix] Structured output supports Enum. Signed-off-by: chaunceyjiang --- tests/entrypoints/openai/test_structured_output.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/entrypoints/openai/test_structured_output.py b/tests/entrypoints/openai/test_structured_output.py index 9eb42e44b2d8..d2c4ffea13a8 100644 --- a/tests/entrypoints/openai/test_structured_output.py +++ b/tests/entrypoints/openai/test_structured_output.py @@ -4,9 +4,10 @@ import openai # use the official client for correctness check import pytest import pytest_asyncio +from pydantic import BaseModel, ValidationError from ...utils import RemoteOpenAIServer -from pydantic import BaseModel, ValidationError + # # any model with a chat template should work here MODEL_NAME = "Qwen/Qwen2.5-3B-Instruct" From 207ac7ad4151809c3e92884970b8af4e7c5456de Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Thu, 27 Mar 2025 10:32:24 +0000 Subject: [PATCH 05/11] [Bugfix] Structured output supports Enum. Signed-off-by: chaunceyjiang --- tests/entrypoints/openai/test_structured_output.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/entrypoints/openai/test_structured_output.py b/tests/entrypoints/openai/test_structured_output.py index d2c4ffea13a8..52940b7380f8 100644 --- a/tests/entrypoints/openai/test_structured_output.py +++ b/tests/entrypoints/openai/test_structured_output.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 from enum import Enum from typing import NamedTuple + import openai # use the official client for correctness check import pytest import pytest_asyncio From 87a1f23db4e913589c5b42795507d675e5f6617a Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Fri, 28 Mar 2025 09:14:30 +0000 Subject: [PATCH 06/11] [Bugfix][v1] xgrammar structured output supports Enum. Signed-off-by: chaunceyjiang --- .../llm/test_struct_output_generate.py | 56 ++++++++++++++++++- 1 file changed, 54 insertions(+), 2 deletions(-) diff --git a/tests/v1/entrypoints/llm/test_struct_output_generate.py b/tests/v1/entrypoints/llm/test_struct_output_generate.py index 6bdfa0fae4a2..c64f09b62544 100644 --- a/tests/v1/entrypoints/llm/test_struct_output_generate.py +++ b/tests/v1/entrypoints/llm/test_struct_output_generate.py @@ -1,13 +1,14 @@ # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations - +from enum import Enum import json import re from typing import Any import jsonschema import pytest +from pydantic import BaseModel from vllm.entrypoints.llm import LLM from vllm.outputs import RequestOutput @@ -15,7 +16,7 @@ GUIDED_DECODING_BACKENDS_V1 = ["xgrammar", "guidance"] MODELS_TO_TEST = [ - "Qwen/Qwen2.5-1.5B-Instruct", "mistralai/Ministral-8B-Instruct-2410" + "Qwen/Qwen2.5-1.5B-Instruct", ] @@ -390,3 +391,54 @@ def test_guided_choice_completion( assert generated_text is not None assert generated_text in sample_guided_choice print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") + + +class CarType(str, Enum): + sedan = "sedan" + suv = "SUV" + truck = "Truck" + coupe = "Coupe" + + +class CarDescription(BaseModel): + brand: str + model: str + car_type: CarType + + +@pytest.mark.skip_global_cleanup +@pytest.mark.parametrize("guided_decoding_backend", + GUIDED_DECODING_BACKENDS_V1) +@pytest.mark.parametrize("model_name", MODELS_TO_TEST) +def test_guided_json_completion_with_enum( + monkeypatch: pytest.MonkeyPatch, + guided_decoding_backend: str, + model_name: str, +): + monkeypatch.setenv("VLLM_USE_V1", "1") + llm = LLM(model=model_name, + max_model_len=1024, + guided_decoding_backend=guided_decoding_backend) + json_schema = CarDescription.model_json_schema() + sampling_params = SamplingParams( + temperature=1.0, + max_tokens=1000, + guided_decoding=GuidedDecodingParams(json=json_schema)) + outputs = llm.generate( + prompt="Generate a JSON with the brand, model and car_type of" + "the most iconic car from the 90's", + sampling_params=sampling_params, + use_tqdm=True) + + assert outputs is not None + + for output in outputs: + assert output is not None + assert isinstance(output, RequestOutput) + prompt = output.prompt + + generated_text = output.outputs[0].text + assert generated_text is not None + print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") + output_json = json.loads(generated_text) + jsonschema.validate(instance=output_json, schema=json_schema) From 5a246ddbd7e702682249cf0dbe371e636f34fe4a Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Fri, 28 Mar 2025 09:15:18 +0000 Subject: [PATCH 07/11] [Bugfix][v1] xgrammar structured output supports Enum. Signed-off-by: chaunceyjiang --- .../openai/test_structured_output.py | 85 ------------------- 1 file changed, 85 deletions(-) delete mode 100644 tests/entrypoints/openai/test_structured_output.py diff --git a/tests/entrypoints/openai/test_structured_output.py b/tests/entrypoints/openai/test_structured_output.py deleted file mode 100644 index 52940b7380f8..000000000000 --- a/tests/entrypoints/openai/test_structured_output.py +++ /dev/null @@ -1,85 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -from enum import Enum -from typing import NamedTuple - -import openai # use the official client for correctness check -import pytest -import pytest_asyncio -from pydantic import BaseModel, ValidationError - -from ...utils import RemoteOpenAIServer - -# # any model with a chat template should work here -MODEL_NAME = "Qwen/Qwen2.5-3B-Instruct" - - -@pytest.fixture(scope="module") -def server(): - args = [ - # use half precision for speed and memory savings in CI environment - "--dtype", - "float16", - ] - - with RemoteOpenAIServer(MODEL_NAME, args) as remote_server: - yield remote_server - - -@pytest_asyncio.fixture -async def client(server): - async with server.get_async_client() as async_client: - yield async_client - - -class CarType(str, Enum): - sedan = "sedan" - suv = "SUV" - truck = "Truck" - coupe = "Coupe" - - -class CarDescription(BaseModel): - brand: str - model: str - car_type: CarType - - -class TestCase(NamedTuple): - model_name: str - structured: bool - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "test_case", - [ - TestCase(model_name=MODEL_NAME, structured=True), - TestCase(model_name=MODEL_NAME, structured=False) - ], -) -async def test_structured_output_with_enum(client: openai.AsyncOpenAI, - test_case: TestCase): - prompt = ("Generate a JSON with the brand, model and car_type of" - "the most iconic car from the 90's") - extra_body = None - if test_case.structured: - json_schema = CarDescription.model_json_schema() - extra_body = {"guided_json": json_schema} - chat_completion = await client.chat.completions.create( - model=test_case.model_name, - messages=[{ - "role": "user", - "content": prompt, - }], - extra_body=extra_body) - assert chat_completion.id is not None - - choice = chat_completion.choices[0] - assert choice.finish_reason == "stop" - message = choice.message - if test_case.structured: - CarDescription.model_validate_json(message.content) - else: - with pytest.raises(ValidationError): - CarDescription.model_validate_json(message.content) - assert message.role == "assistant" From 45ab5b30f2f84a8c69c78fa8fb4dedce0155bef5 Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Fri, 28 Mar 2025 09:15:52 +0000 Subject: [PATCH 08/11] [Bugfix][v1] xgrammar structured output supports Enum. Signed-off-by: chaunceyjiang --- tests/v1/entrypoints/llm/test_struct_output_generate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/v1/entrypoints/llm/test_struct_output_generate.py b/tests/v1/entrypoints/llm/test_struct_output_generate.py index c64f09b62544..46d83b3ed2d7 100644 --- a/tests/v1/entrypoints/llm/test_struct_output_generate.py +++ b/tests/v1/entrypoints/llm/test_struct_output_generate.py @@ -16,7 +16,7 @@ GUIDED_DECODING_BACKENDS_V1 = ["xgrammar", "guidance"] MODELS_TO_TEST = [ - "Qwen/Qwen2.5-1.5B-Instruct", + "Qwen/Qwen2.5-1.5B-Instruct", "mistralai/Ministral-8B-Instruct-2410" ] From 89f8526c0dbf82366ac3581b09102d069b5667aa Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Fri, 28 Mar 2025 09:17:57 +0000 Subject: [PATCH 09/11] [Bugfix][v1] xgrammar structured output supports Enum. Signed-off-by: chaunceyjiang --- tests/v1/entrypoints/llm/test_struct_output_generate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/v1/entrypoints/llm/test_struct_output_generate.py b/tests/v1/entrypoints/llm/test_struct_output_generate.py index 46d83b3ed2d7..a327856c1c40 100644 --- a/tests/v1/entrypoints/llm/test_struct_output_generate.py +++ b/tests/v1/entrypoints/llm/test_struct_output_generate.py @@ -425,7 +425,7 @@ def test_guided_json_completion_with_enum( max_tokens=1000, guided_decoding=GuidedDecodingParams(json=json_schema)) outputs = llm.generate( - prompt="Generate a JSON with the brand, model and car_type of" + prompts="Generate a JSON with the brand, model and car_type of" "the most iconic car from the 90's", sampling_params=sampling_params, use_tqdm=True) From f33eda4e3ae036d3d5737eb53a10dfcc2b2d777b Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Fri, 28 Mar 2025 09:24:28 +0000 Subject: [PATCH 10/11] [Bugfix][v1] xgrammar structured output supports Enum. Signed-off-by: chaunceyjiang --- tests/v1/entrypoints/llm/test_struct_output_generate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/v1/entrypoints/llm/test_struct_output_generate.py b/tests/v1/entrypoints/llm/test_struct_output_generate.py index a327856c1c40..038a9dc8f383 100644 --- a/tests/v1/entrypoints/llm/test_struct_output_generate.py +++ b/tests/v1/entrypoints/llm/test_struct_output_generate.py @@ -1,9 +1,9 @@ # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations -from enum import Enum import json import re +from enum import Enum from typing import Any import jsonschema From 61fa2f3024faec6da094a9158c74e51f1a970bc7 Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Fri, 28 Mar 2025 09:31:24 +0000 Subject: [PATCH 11/11] [Bugfix][v1] xgrammar structured output supports Enum. Signed-off-by: chaunceyjiang --- tests/v1/entrypoints/llm/test_struct_output_generate.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/v1/entrypoints/llm/test_struct_output_generate.py b/tests/v1/entrypoints/llm/test_struct_output_generate.py index 038a9dc8f383..00fa47575b6a 100644 --- a/tests/v1/entrypoints/llm/test_struct_output_generate.py +++ b/tests/v1/entrypoints/llm/test_struct_output_generate.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations + import json import re from enum import Enum