2020# any model with a chat template should work here 
2121MODEL_NAME  =  "HuggingFaceH4/zephyr-7b-beta" 
2222
23- GUIDED_DECODING_BACKENDS  =  ["outlines" , "lm-format-enforcer" , "xgrammar" ]
24- 
2523
2624@pytest .fixture (scope = "module" ) 
2725def  monkeypatch_module ():
@@ -487,20 +485,9 @@ async def test_chat_completion_stream_options(client: openai.AsyncOpenAI,
487485    assert  last_completion_tokens  ==  10 
488486
489487
490- # NOTE: Not sure why, but when I place this after `test_guided_regex_chat` 
491- # (i.e. using the same ordering as in the Completions API tests), the test 
492- # will fail on the second `guided_decoding_backend` even when I swap their order 
493- # (ref: https://github.com/vllm-project/vllm/pull/5526#issuecomment-2173772256) 
494488@pytest .mark .asyncio  
495- @pytest .mark .parametrize ("guided_decoding_backend" , GUIDED_DECODING_BACKENDS ) 
496489async  def  test_guided_choice_chat (client : openai .AsyncOpenAI ,
497-                                   is_v1_server : bool ,
498-                                   guided_decoding_backend : str ,
499490                                  sample_guided_choice ):
500- 
501-     if  is_v1_server  and  guided_decoding_backend  !=  'xgrammar' :
502-         pytest .skip ("Only xgrammar backend is supported with V1" )
503- 
504491    messages  =  [{
505492        "role" : "system" ,
506493        "content" : "you are a helpful assistant" 
@@ -515,8 +502,7 @@ async def test_guided_choice_chat(client: openai.AsyncOpenAI,
515502        messages = messages ,
516503        max_completion_tokens = 10 ,
517504        temperature = 0.7 ,
518-         extra_body = dict (guided_choice = sample_guided_choice ,
519-                         guided_decoding_backend = guided_decoding_backend ))
505+         extra_body = dict (guided_choice = sample_guided_choice ))
520506    choice1  =  chat_completion .choices [0 ].message .content 
521507    assert  choice1  in  sample_guided_choice 
522508
@@ -530,22 +516,16 @@ async def test_guided_choice_chat(client: openai.AsyncOpenAI,
530516        messages = messages ,
531517        max_completion_tokens = 10 ,
532518        temperature = 0.7 ,
533-         extra_body = dict (guided_choice = sample_guided_choice ,
534-                         guided_decoding_backend = guided_decoding_backend ))
519+         extra_body = dict (guided_choice = sample_guided_choice ))
535520    choice2  =  chat_completion .choices [0 ].message .content 
536521    assert  choice2  in  sample_guided_choice 
537522    assert  choice1  !=  choice2 
538523
539524
540525@pytest .mark .asyncio  
541- @pytest .mark .parametrize ("guided_decoding_backend" , GUIDED_DECODING_BACKENDS ) 
542- async  def  test_guided_json_chat (client : openai .AsyncOpenAI , is_v1_server : bool ,
543-                                 guided_decoding_backend : str ,
526+ async  def  test_guided_json_chat (client : openai .AsyncOpenAI ,
544527                                sample_json_schema ):
545528
546-     if  is_v1_server :
547-         pytest .skip ("sample_json_schema has features unsupported in V1" )
548- 
549529    messages  =  [{
550530        "role" : "system" ,
551531        "content" : "you are a helpful assistant" 
@@ -560,8 +540,7 @@ async def test_guided_json_chat(client: openai.AsyncOpenAI, is_v1_server: bool,
560540        model = MODEL_NAME ,
561541        messages = messages ,
562542        max_completion_tokens = 1000 ,
563-         extra_body = dict (guided_json = sample_json_schema ,
564-                         guided_decoding_backend = guided_decoding_backend ))
543+         extra_body = dict (guided_json = sample_json_schema ))
565544    message  =  chat_completion .choices [0 ].message 
566545    assert  message .content  is  not None 
567546    json1  =  json .loads (message .content )
@@ -578,8 +557,7 @@ async def test_guided_json_chat(client: openai.AsyncOpenAI, is_v1_server: bool,
578557        model = MODEL_NAME ,
579558        messages = messages ,
580559        max_completion_tokens = 1000 ,
581-         extra_body = dict (guided_json = sample_json_schema ,
582-                         guided_decoding_backend = guided_decoding_backend ))
560+         extra_body = dict (guided_json = sample_json_schema ))
583561    message  =  chat_completion .choices [0 ].message 
584562    assert  message .content  is  not None 
585563    json2  =  json .loads (message .content )
@@ -589,13 +567,7 @@ async def test_guided_json_chat(client: openai.AsyncOpenAI, is_v1_server: bool,
589567
590568
591569@pytest .mark .asyncio  
592- @pytest .mark .parametrize ("guided_decoding_backend" , GUIDED_DECODING_BACKENDS ) 
593- async  def  test_guided_regex_chat (client : openai .AsyncOpenAI ,
594-                                  is_v1_server : bool ,
595-                                  guided_decoding_backend : str , sample_regex ):
596- 
597-     if  is_v1_server  and  guided_decoding_backend  !=  'xgrammar' :
598-         pytest .skip ("Only xgrammar backend is supported with V1" )
570+ async  def  test_guided_regex_chat (client : openai .AsyncOpenAI , sample_regex ):
599571
600572    messages  =  [{
601573        "role" : "system" ,
@@ -610,8 +582,7 @@ async def test_guided_regex_chat(client: openai.AsyncOpenAI,
610582        model = MODEL_NAME ,
611583        messages = messages ,
612584        max_completion_tokens = 20 ,
613-         extra_body = dict (guided_regex = sample_regex ,
614-                         guided_decoding_backend = guided_decoding_backend ))
585+         extra_body = dict (guided_regex = sample_regex ))
615586    ip1  =  chat_completion .choices [0 ].message .content 
616587    assert  ip1  is  not None 
617588    assert  re .fullmatch (sample_regex , ip1 ) is  not None 
@@ -622,8 +593,7 @@ async def test_guided_regex_chat(client: openai.AsyncOpenAI,
622593        model = MODEL_NAME ,
623594        messages = messages ,
624595        max_completion_tokens = 20 ,
625-         extra_body = dict (guided_regex = sample_regex ,
626-                         guided_decoding_backend = guided_decoding_backend ))
596+         extra_body = dict (guided_regex = sample_regex ))
627597    ip2  =  chat_completion .choices [0 ].message .content 
628598    assert  ip2  is  not None 
629599    assert  re .fullmatch (sample_regex , ip2 ) is  not None 
@@ -652,15 +622,9 @@ async def test_guided_decoding_type_error(client: openai.AsyncOpenAI):
652622
653623
654624@pytest .mark .asyncio  
655- @pytest .mark .parametrize ("guided_decoding_backend" , GUIDED_DECODING_BACKENDS ) 
656625async  def  test_guided_choice_chat_logprobs (client : openai .AsyncOpenAI ,
657-                                            is_v1_server : bool ,
658-                                            guided_decoding_backend : str ,
659626                                           sample_guided_choice ):
660627
661-     if  is_v1_server  and  guided_decoding_backend  !=  'xgrammar' :
662-         pytest .skip ("Only xgrammar backend is supported with V1" )
663- 
664628    messages  =  [{
665629        "role" : "system" ,
666630        "content" : "you are a helpful assistant" 
@@ -676,8 +640,7 @@ async def test_guided_choice_chat_logprobs(client: openai.AsyncOpenAI,
676640        max_completion_tokens = 10 ,
677641        logprobs = True ,
678642        top_logprobs = 5 ,
679-         extra_body = dict (guided_choice = sample_guided_choice ,
680-                         guided_decoding_backend = guided_decoding_backend ))
643+         extra_body = dict (guided_choice = sample_guided_choice ))
681644
682645    assert  chat_completion .choices [0 ].logprobs  is  not None 
683646    assert  chat_completion .choices [0 ].logprobs .content  is  not None 
@@ -689,14 +652,7 @@ async def test_guided_choice_chat_logprobs(client: openai.AsyncOpenAI,
689652
690653
691654@pytest .mark .asyncio  
692- @pytest .mark .parametrize ("guided_decoding_backend" , GUIDED_DECODING_BACKENDS ) 
693- async  def  test_named_tool_use (client : openai .AsyncOpenAI , is_v1_server : bool ,
694-                               guided_decoding_backend : str ,
695-                               sample_json_schema ):
696- 
697-     if  is_v1_server :
698-         pytest .skip ("sample_json_schema has features unsupported on V1" )
699- 
655+ async  def  test_named_tool_use (client : openai .AsyncOpenAI , sample_json_schema ):
700656    messages  =  [{
701657        "role" : "system" ,
702658        "content" : "you are a helpful assistant" 
@@ -728,7 +684,7 @@ async def test_named_tool_use(client: openai.AsyncOpenAI, is_v1_server: bool,
728684                "name" : "dummy_function_name" 
729685            }
730686        },
731-          extra_body = dict ( guided_decoding_backend = guided_decoding_backend ) )
687+     )
732688    message  =  chat_completion .choices [0 ].message 
733689    assert  len (message .content ) ==  0 
734690    json_string  =  message .tool_calls [0 ].function .arguments 
@@ -763,7 +719,6 @@ async def test_named_tool_use(client: openai.AsyncOpenAI, is_v1_server: bool,
763719                "name" : "dummy_function_name" 
764720            }
765721        },
766-         extra_body = dict (guided_decoding_backend = guided_decoding_backend ),
767722        stream = True )
768723
769724    output  =  []
@@ -888,7 +843,6 @@ async def test_required_tool_use(client: openai.AsyncOpenAI,
888843        model = model_name ,
889844        tools = tools ,
890845        tool_choice = "required" ,
891-         extra_body = dict (guided_decoding_backend = "outlines" ),
892846    )
893847
894848    assert  chat_completion .choices [0 ].message .tool_calls  is  not None 
@@ -900,7 +854,6 @@ async def test_required_tool_use(client: openai.AsyncOpenAI,
900854        model = model_name ,
901855        tools = tools ,
902856        tool_choice = "required" ,
903-         extra_body = dict (guided_decoding_backend = "outlines" ),
904857        stream = True ,
905858    )
906859
@@ -914,12 +867,7 @@ async def test_required_tool_use(client: openai.AsyncOpenAI,
914867
915868@pytest .mark .asyncio  
916869async  def  test_inconsistent_tool_choice_and_tools (client : openai .AsyncOpenAI ,
917-                                                   is_v1_server : bool ,
918870                                                  sample_json_schema ):
919- 
920-     if  is_v1_server :
921-         pytest .skip ("sample_json_schema has features unsupported on V1" )
922- 
923871    messages  =  [{
924872        "role" : "system" ,
925873        "content" : "you are a helpful assistant" 
0 commit comments