@@ -149,6 +149,7 @@ def _validate_structured_output(self, params: SamplingParams) -> None:
149149 "xgrammar" , "xgrammar:disable-any-whitespace" , "guidance" ,
150150 "guidance:disable-any-whitespace" , "auto"
151151 ]
152+
152153 engine_level_backend = self .decoding_config .guided_decoding_backend
153154 if engine_level_backend not in supported_backends :
154155 raise ValueError (f"Only { supported_backends } structured output is "
@@ -169,8 +170,15 @@ def _validate_structured_output(self, params: SamplingParams) -> None:
169170 if engine_level_backend .startswith ("xgrammar" ):
170171 # xgrammar with no fallback
171172 validate_xgrammar_grammar (params )
172- params .guided_decoding .backend = engine_level_backend
173- elif engine_level_backend == "auto" :
173+ elif engine_level_backend .startswith ("guidance" ):
174+ # TODO: ideally we would have the LLTokenizer here as Lark syntax
175+ # allows <|special_token|> and similar, see
176+ # https://github.com/guidance-ai/llguidance/blob/main/docs/syntax.md#special-tokens
177+ # Without tokenizer these are disallowed in grammars.
178+ validate_guidance_grammar (params , tokenizer = None )
179+ else :
180+ # NOTE: engine_level_backend must be "auto" here, because we have
181+ # checked supported_backends above.
174182 # "auto" is an opt-in to opinionated behavior where we try to
175183 # choose a backend based on request contents. This is not the
176184 # default as it is less predictable and subject to change
@@ -183,14 +191,6 @@ def _validate_structured_output(self, params: SamplingParams) -> None:
183191 # are not supported in xgrammar. Fall back to guidance.
184192 params .guided_decoding .backend = "guidance"
185193
186- if engine_level_backend .startswith ("guidance" ):
187- # TODO ideally we would have the LLTokenizer here as Lark syntax
188- # allows <|special_token|> and similar, see
189- # https://github.com/guidance-ai/llguidance/blob/main/docs/syntax.md#special-tokens
190- # Without tokenizer these are disallowed in grammars.
191- validate_guidance_grammar (params , tokenizer = None )
192- params .guided_decoding .backend = engine_level_backend
193-
194194 def process_inputs (
195195 self ,
196196 request_id : str ,
0 commit comments