1919 xgr_installed = False
2020 pass
2121
22+ from vllm .logger import init_logger
2223from vllm .model_executor .guided_decoding .utils import (convert_lark_to_gbnf ,
2324 grammar_is_likely_lark )
2425from vllm .transformers_utils .tokenizers .mistral import MistralTokenizer
2930 from vllm .config import ModelConfig
3031 from vllm .sampling_params import GuidedDecodingParams
3132
33+ logger = init_logger (__name__ )
34+
3235
3336# TODO: passing batch size to max threads here
3437def get_local_xgrammar_guided_decoding_logits_processor (
@@ -161,6 +164,7 @@ class GrammarConfig:
161164 json_str : str | None = None
162165 grammar_str : str | None = None
163166 json_object : bool | None = None
167+ any_whitespace : bool = True
164168 max_threads : int = 8
165169 tokenizer_data : TokenizerData | None = None
166170
@@ -180,19 +184,42 @@ def from_guided_params(cls,
180184 else :
181185 json_str = guided_params .json
182186
187+ any_whitespace = 'disable-any-whitespace' not in \
188+ guided_params .backend_options ()
189+
190+ # Check and log if model with xgrammar and whitespace have history
191+ # of runaway generation of whitespaces.
192+ # References:
193+ # https://github.com/vllm-project/vllm/pull/12744
194+ # https://github.com/mlc-ai/xgrammar/issues/212
195+ model_with_warn = None
196+
197+ if 'Mistral' in model_config .model :
198+ model_with_warn = 'Mistral'
199+ elif 'Qwen' in model_config .model :
200+ model_with_warn = 'Qwen'
201+
202+ if model_with_warn is not None and any_whitespace :
203+ msg = (f"{ model_with_warn } "
204+ f"model detected, consider set "
205+ f"`guided_backend=xgrammar:disable-any-whitespace` "
206+ f"to prevent runaway generation of whitespaces." )
207+ logger .info_once (msg )
183208 # Validate the schema and raise ValueError here if it is invalid.
184209 # This is to avoid exceptions in model execution, which will crash
185210 # the engine worker process.
186211 try :
187- xgr .Grammar .from_json_schema (json_str )
212+ xgr .Grammar .from_json_schema (json_str ,
213+ any_whitespace = any_whitespace )
188214 except RuntimeError as err :
189215 raise ValueError (str (err )) from err
190216
191217 return cls (json_str = json_str ,
192218 vocab_size = model_config .hf_text_config .vocab_size ,
193219 tokenizer_hash = tokenizer_hash ,
194220 max_threads = max_threads ,
195- tokenizer_data = tokenizer_data )
221+ tokenizer_data = tokenizer_data ,
222+ any_whitespace = any_whitespace )
196223 elif guided_params .grammar :
197224 # XGrammar only supports GBNF grammars, so we must convert Lark
198225 if grammar_is_likely_lark (guided_params .grammar ):
@@ -290,7 +317,10 @@ def _ensure_ctx(self):
290317 if self .ctx is None :
291318 compiler = GrammarCompilerCache .get_compiler (self .config )
292319 if self .config .json_str is not None :
293- self .ctx = compiler .compile_json_schema (self .config .json_str )
320+ any_whitespace = self .config .any_whitespace
321+ self .ctx = compiler \
322+ .compile_json_schema (self .config .json_str ,
323+ any_whitespace = any_whitespace )
294324 elif self .config .grammar_str is not None :
295325 self .ctx = compiler .compile_grammar (self .config .grammar_str )
296326 elif self .config .json_object :
0 commit comments