Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 23 additions & 10 deletions tests/v1/entrypoints/llm/test_struct_output_generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,20 @@
from vllm.outputs import RequestOutput
from vllm.sampling_params import GuidedDecodingParams, SamplingParams

GUIDED_DECODING_BACKENDS_V1 = [
"xgrammar:disable-any-whitespace", "guidance:disable-any-whitespace"
PARAMS_MODELS_BACKENDS_TOKENIZER_MODE = [
("mistralai/Ministral-8B-Instruct-2410", "xgrammar:disable-any-whitespace",
"auto"),
("mistralai/Ministral-8B-Instruct-2410", "guidance:disable-any-whitespace",
"auto"),
("mistralai/Ministral-8B-Instruct-2410", "xgrammar:disable-any-whitespace",
"mistral"),
("Qwen/Qwen2.5-1.5B-Instruct", "xgrammar:disable-any-whitespace", "auto"),
("Qwen/Qwen2.5-1.5B-Instruct", "guidance:disable-any-whitespace", "auto"),
]
MODELS_TO_TEST = [
"Qwen/Qwen2.5-1.5B-Instruct", "mistralai/Ministral-8B-Instruct-2410"

PARAMS_MODELS_TOKENIZER_MODE = [
("mistralai/Ministral-8B-Instruct-2410", "auto"),
("Qwen/Qwen2.5-1.5B-Instruct", "auto"),
]


Expand All @@ -37,9 +46,8 @@ class CarDescription(BaseModel):


@pytest.mark.skip_global_cleanup
@pytest.mark.parametrize("guided_decoding_backend",
GUIDED_DECODING_BACKENDS_V1)
@pytest.mark.parametrize("model_name", MODELS_TO_TEST)
@pytest.mark.parametrize("model_name, guided_decoding_backend, tokenizer_mode",
PARAMS_MODELS_BACKENDS_TOKENIZER_MODE)
def test_structured_output(
monkeypatch: pytest.MonkeyPatch,
sample_json_schema: dict[str, Any],
Expand All @@ -49,6 +57,7 @@ def test_structured_output(
sample_regex: str,
sample_guided_choice: str,
guided_decoding_backend: str,
tokenizer_mode: str,
model_name: str,
):
monkeypatch.setenv("VLLM_USE_V1", "1")
Expand All @@ -58,7 +67,8 @@ def test_structured_output(
llm = LLM(model=model_name,
enforce_eager=True,
max_model_len=1024,
guided_decoding_backend=guided_decoding_backend)
guided_decoding_backend=guided_decoding_backend,
tokenizer_mode=tokenizer_mode)

#
# Test 1: Generate JSON output based on a provided schema
Expand Down Expand Up @@ -324,17 +334,20 @@ def test_structured_output(


@pytest.mark.skip_global_cleanup
@pytest.mark.parametrize("model_name", MODELS_TO_TEST)
@pytest.mark.parametrize("model_name, tokenizer_mode",
PARAMS_MODELS_TOKENIZER_MODE)
def test_structured_output_auto_mode(
monkeypatch: pytest.MonkeyPatch,
unsupported_json_schema: dict[str, Any],
model_name: str,
tokenizer_mode: str,
):
monkeypatch.setenv("VLLM_USE_V1", "1")

llm = LLM(model=model_name,
max_model_len=1024,
guided_decoding_backend="auto")
guided_decoding_backend="auto",
tokenizer_mode=tokenizer_mode)

sampling_params = SamplingParams(
temperature=1.0,
Expand Down
18 changes: 11 additions & 7 deletions vllm/v1/structured_output/backend_xgrammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,15 @@ def __init__(self, vllm_config: VllmConfig):
# NOTE: ideally, xgrammar should handle this accordingly.
# refer to https://github.com/mlc-ai/xgrammar/blob/d77c0a0173ef14779c918e3be7966ba852f7910f/python/xgrammar/tokenizer_info.py#L98
try:
encoded_vocab = [
token for token, _ in sorted(
tokenizer.get_vocab().items(),
key=lambda x: x[1],
)
]
if tokenizer.is_tekken:
encoded_vocab = tokenizer._vocab
else:
encoded_vocab = [
token for token, _ in sorted(
tokenizer.get_vocab().items(),
key=lambda x: x[1],
)
]
stop_token_ids = None
if hasattr(
tokenizer,
Expand All @@ -62,7 +65,8 @@ def __init__(self, vllm_config: VllmConfig):
tokenizer_info = xgr.TokenizerInfo( # type: ignore
encoded_vocab=encoded_vocab,
# NOTE: https://github.com/mlc-ai/xgrammar/blob/5e141f6ff1ca02bc31f9e512e68b61f2a8ae88e5/tests/python/test_tokenizer_info.py#L43 # noqa: E501
vocab_type=xgr.VocabType.BYTE_FALLBACK,
vocab_type=xgr.VocabType.RAW
if tokenizer.is_tekken else xgr.VocabType.BYTE_FALLBACK,
vocab_size=self.vocab_size,
stop_token_ids=stop_token_ids,
add_prefix_space=True,
Expand Down