Skip to content

Commit 81aadb6

Browse files
committed
chore: migrate tokenizer init to manager only
Signed-off-by: Aaron Pham <contact@aarnphm.xyz>
1 parent dc1b4a6 commit 81aadb6

File tree

4 files changed

+54
-44
lines changed

4 files changed

+54
-44
lines changed

vllm/v1/structured_output/__init__.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,11 @@
77

88
from vllm.config import VllmConfig
99
from vllm.logger import init_logger
10+
from vllm.transformers_utils.tokenizer_group import init_tokenizer_from_configs
1011
from vllm.v1.structured_output.backend_guidance import GuidanceBackend
1112
from vllm.v1.structured_output.backend_types import (StructuredOutputBackend,
1213
StructuredOutputGrammar)
14+
from vllm.v1.structured_output.backend_xgrammar import XgrammarBackend
1315

1416
if TYPE_CHECKING:
1517
import numpy as np
@@ -46,13 +48,26 @@ def grammar_init(self, request: Request) -> None:
4648
# backends on a per-request basis in V1 (for now, anyway...).
4749
if self.backend is None:
4850
backend_name = request.sampling_params.guided_decoding.backend_name
51+
tokenizer_group = init_tokenizer_from_configs(
52+
model_config=self.vllm_config.model_config,
53+
scheduler_config=self.vllm_config.scheduler_config,
54+
parallel_config=self.vllm_config.parallel_config,
55+
lora_config=self.vllm_config.lora_config)
56+
tokenizer_group.ping()
57+
tokenizer = tokenizer_group.get_lora_tokenizer(None)
58+
vocab_size = self.vllm_config.model_config.get_vocab_size()
4959
if backend_name == "xgrammar":
50-
from vllm.v1.structured_output.backend_xgrammar import (
51-
XgrammarBackend)
52-
53-
self.backend = XgrammarBackend(self.vllm_config)
60+
self.backend = XgrammarBackend(
61+
self.vllm_config,
62+
tokenizer=tokenizer,
63+
vocab_size=vocab_size,
64+
)
5465
elif backend_name == "guidance":
55-
self.backend = GuidanceBackend(self.vllm_config)
66+
self.backend = GuidanceBackend(
67+
self.vllm_config,
68+
tokenizer=tokenizer,
69+
vocab_size=vocab_size,
70+
)
5671
else:
5772
raise ValueError(
5873
f"Unsupported structured output backend: {backend_name}")

vllm/v1/structured_output/backend_guidance.py

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
# SPDX-License-Identifier: Apache-2.0
22

3+
from __future__ import annotations
4+
35
import os
46
from dataclasses import dataclass
57
from typing import TYPE_CHECKING, Optional
68

79
import torch
810

9-
from vllm.config import VllmConfig
1011
from vllm.logger import init_logger
1112
from vllm.sampling_params import SamplingParams
12-
from vllm.transformers_utils.tokenizer_group import init_tokenizer_from_configs
1313
from vllm.utils import LazyLoader
1414
from vllm.v1.structured_output.backend_types import (StructuredOutputBackend,
1515
StructuredOutputGrammar,
@@ -29,25 +29,16 @@
2929
logger = init_logger(__name__)
3030

3131

32+
@dataclass
3233
class GuidanceBackend(StructuredOutputBackend):
3334

34-
def __init__(self, vllm_config: VllmConfig):
35-
self.vllm_config = vllm_config
36-
tokenizer_group = init_tokenizer_from_configs(
37-
model_config=vllm_config.model_config,
38-
scheduler_config=vllm_config.scheduler_config,
39-
parallel_config=vllm_config.parallel_config,
40-
lora_config=vllm_config.lora_config) # type: ignore[arg-type]
41-
tokenizer_group.ping()
42-
self.vllm_config = vllm_config
43-
self.vocab_size = vllm_config.model_config.get_vocab_size()
35+
def __post_init__(self):
4436
self.disable_any_whitespace = (
4537
"disable-any-whitespace"
46-
in vllm_config.decoding_config.guided_decoding_backend)
38+
in self.vllm_config.decoding_config.guided_decoding_backend)
4739

48-
tokenizer = tokenizer_group.get_lora_tokenizer(None)
4940
self.ll_tokenizer = llguidance_hf.from_tokenizer(
50-
tokenizer, self.vocab_size)
41+
self.tokenizer, self.vocab_size)
5142

5243
def compile_grammar(self, request_type: StructuredOutputOptions,
5344
grammar_spec: str) -> StructuredOutputGrammar:

vllm/v1/structured_output/backend_types.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,18 @@
11
# SPDX-License-Identifier: Apache-2.0
22

3+
from __future__ import annotations
4+
35
import enum
46
from abc import ABC, abstractmethod
7+
from dataclasses import dataclass
8+
from typing import TYPE_CHECKING
59

610
import torch
711

12+
if TYPE_CHECKING:
13+
from vllm.config import VllmConfig
14+
from vllm.transformers_utils.tokenizer import AnyTokenizer
15+
816

917
class StructuredOutputOptions(enum.Enum):
1018
JSON = enum.auto()
@@ -60,9 +68,14 @@ def reset(self):
6068
"""
6169

6270

71+
@dataclass
6372
class StructuredOutputBackend(ABC):
6473
"""Engine-level backend for structured output requests."""
6574

75+
vllm_config: VllmConfig
76+
tokenizer: AnyTokenizer
77+
vocab_size: int
78+
6679
@abstractmethod
6780
def compile_grammar(self, request_type: StructuredOutputOptions,
6881
grammar_spec: str) -> StructuredOutputGrammar:

vllm/v1/structured_output/backend_xgrammar.py

Lines changed: 15 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
# SPDX-License-Identifier: Apache-2.0
22

3+
from __future__ import annotations
4+
35
from dataclasses import dataclass, field
46
from typing import TYPE_CHECKING
57

68
import torch
79

810
import vllm.envs
9-
from vllm.config import VllmConfig
1011
from vllm.logger import init_logger
11-
from vllm.transformers_utils.tokenizer_group import init_tokenizer_from_configs
1212
from vllm.transformers_utils.tokenizers.mistral import MistralTokenizer
1313
from vllm.utils import LazyLoader
1414
from vllm.v1.structured_output.backend_types import (StructuredOutputBackend,
@@ -23,58 +23,49 @@
2323
logger = init_logger(__name__)
2424

2525

26+
@dataclass
2627
class XgrammarBackend(StructuredOutputBackend):
2728

28-
def __init__(self, vllm_config: VllmConfig):
29-
self.vllm_config = vllm_config
29+
def __post_init__(self):
3030
self.disable_any_whitespace = (
3131
"disable-any-whitespace"
32-
in vllm_config.decoding_config.guided_decoding_backend)
33-
tokenizer_group = init_tokenizer_from_configs(
34-
model_config=vllm_config.model_config,
35-
scheduler_config=vllm_config.scheduler_config,
36-
parallel_config=vllm_config.parallel_config,
37-
lora_config=vllm_config.lora_config) # type: ignore[arg-type]
38-
tokenizer_group.ping()
39-
40-
tokenizer = tokenizer_group.get_lora_tokenizer(None)
41-
self.vocab_size = vllm_config.model_config.get_vocab_size()
42-
if isinstance(tokenizer, MistralTokenizer):
32+
in self.vllm_config.decoding_config.guided_decoding_backend)
33+
if isinstance(self.tokenizer, MistralTokenizer):
4334
# NOTE: ideally, xgrammar should handle this accordingly.
4435
# refer to https://github.com/mlc-ai/xgrammar/blob/d77c0a0173ef14779c918e3be7966ba852f7910f/python/xgrammar/tokenizer_info.py#L98
4536
try:
46-
if tokenizer.is_tekken:
47-
encoded_vocab = tokenizer._vocab
37+
if self.tokenizer.is_tekken:
38+
encoded_vocab = self.tokenizer._vocab
4839
else:
4940
encoded_vocab = [
5041
token for token, _ in sorted(
51-
tokenizer.get_vocab().items(),
42+
self.tokenizer.get_vocab().items(),
5243
key=lambda x: x[1],
5344
)
5445
]
5546
stop_token_ids = None
5647
if hasattr(
57-
tokenizer,
48+
self.tokenizer,
5849
"eos_token_id",
59-
) and tokenizer.eos_token_id is not None:
60-
stop_token_ids = [tokenizer.eos_token_id]
50+
) and self.tokenizer.eos_token_id is not None:
51+
stop_token_ids = [self.tokenizer.eos_token_id]
6152
except AttributeError as e:
6253
raise ValueError(
6354
f"Cannot get the vocabulary of the tokenizer "
64-
f"{type(tokenizer)}. The tokenizer should have a "
55+
f"{type(self.tokenizer)}. The tokenizer should have a "
6556
"get_vocab method.") from e
6657
tokenizer_info = xgr.TokenizerInfo( # type: ignore
6758
encoded_vocab=encoded_vocab,
6859
# NOTE: https://github.com/mlc-ai/xgrammar/blob/5e141f6ff1ca02bc31f9e512e68b61f2a8ae88e5/tests/python/test_tokenizer_info.py#L43 # noqa: E501
6960
vocab_type=xgr.VocabType.RAW
70-
if tokenizer.is_tekken else xgr.VocabType.BYTE_FALLBACK,
61+
if self.tokenizer.is_tekken else xgr.VocabType.BYTE_FALLBACK,
7162
vocab_size=self.vocab_size,
7263
stop_token_ids=stop_token_ids,
7364
add_prefix_space=True,
7465
)
7566
else:
7667
tokenizer_info = xgr.TokenizerInfo.from_huggingface(
77-
tokenizer,
68+
self.tokenizer,
7869
vocab_size=self.vocab_size,
7970
)
8071
self.compiler = xgr.GrammarCompiler(

0 commit comments

Comments
 (0)