Skip to content

Commit 901d705

Browse files
committed
[V1] Set structured output backend to auto by default
The previous default was `xgrammar` and users had to opt-in to fallback behavior. After more thought, `auto` seems like a better default as it lets us do our best to satisfy all requests. Users can still pin vllm to a single backend if desired. Make `auto` work for V0 in case it gets specified there, as well. Signed-off-by: Russell Bryant <rbryant@redhat.com>
1 parent c3b5189 commit 901d705

File tree

3 files changed

+11
-5
lines changed

3 files changed

+11
-5
lines changed

vllm/config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2888,7 +2888,7 @@ class DecodingConfig:
28882888

28892889
# Which guided decoding algo to use.
28902890
# 'outlines' / 'lm-format-enforcer' / 'xgrammar'
2891-
guided_decoding_backend: str = 'xgrammar'
2891+
guided_decoding_backend: str = "auto" if envs.VLLM_USE_V1 else "xgrammar"
28922892

28932893
reasoning_backend: Optional[str] = None
28942894

@@ -2913,7 +2913,7 @@ def compute_hash(self) -> str:
29132913

29142914
def __post_init__(self):
29152915
v0_valid_guided_backends = [
2916-
'outlines', 'lm-format-enforcer', 'xgrammar'
2916+
'outlines', 'lm-format-enforcer', 'xgrammar', 'auto'
29172917
]
29182918
v1_valid_guided_backends = ['xgrammar', 'guidance', 'auto']
29192919

vllm/engine/arg_utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ class EngineArgs:
181181
enable_chunked_prefill: Optional[bool] = None
182182
disable_chunked_mm_input: bool = False
183183

184-
guided_decoding_backend: str = 'xgrammar'
184+
guided_decoding_backend: str = DecodingConfig.guided_decoding_backend
185185
logits_processor_pattern: Optional[str] = None
186186

187187
speculative_config: Optional[Dict[str, Any]] = None
@@ -381,13 +381,13 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
381381
parser.add_argument(
382382
'--guided-decoding-backend',
383383
type=str,
384-
default='xgrammar',
384+
default=DecodingConfig.guided_decoding_backend,
385385
help='Which engine will be used for guided decoding'
386386
' (JSON schema / regex etc) by default. Currently support '
387387
'https://github.com/mlc-ai/xgrammar and '
388388
'https://github.com/guidance-ai/llguidance.'
389389
'Valid backend values are "xgrammar", "guidance", and "auto". '
390-
'With "auto", we will make opinionated choices based on request'
390+
'With "auto", we will make opinionated choices based on request '
391391
'contents and what the backend libraries currently support, so '
392392
'the behavior is subject to change in each release.')
393393
parser.add_argument(

vllm/model_executor/guided_decoding/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,12 @@ def fallback_or_error(guided_params: GuidedDecodingParams, message: str,
3333
logger.warning("%s Falling back to use %s instead.", message, fallback)
3434
guided_params.backend = fallback
3535

36+
# `auto` was added for V1 to explicitly declare a mode that has fallbacks
37+
# in place. If that is specified with V0, treat it as `xgrammar`, as we have
38+
# fallbacks enabled for that and it is the V0 default.
39+
if guided_params.backend == "auto":
40+
guided_params.backend = "xgrammar"
41+
3642
# lm-format-enforce doesn't support grammar, fallback to xgrammar
3743
if guided_params.backend_name == "lm-format-enforcer":
3844
if guided_params.grammar is not None:

0 commit comments

Comments
 (0)