11# SPDX-License-Identifier: Apache-2.0
2+ """
3+ To run this example, you need to start the vLLM server:
4+
5+ ```bash
6+ vllm serve Qwen/Qwen2.5-3B-Instruct
7+ ```
8+ """
29
310from enum import Enum
411
512from openai import BadRequestError , OpenAI
613from pydantic import BaseModel
714
8- client = OpenAI (
9- base_url = "http://localhost:8000/v1" ,
10- api_key = "-" ,
11- )
1215
1316# Guided decoding by Choice (list of possible options)
14- completion = client .chat .completions .create (
15- model = "Qwen/Qwen2.5-3B-Instruct" ,
16- messages = [{
17- "role" : "user" ,
18- "content" : "Classify this sentiment: vLLM is wonderful!"
19- }],
20- extra_body = {"guided_choice" : ["positive" , "negative" ]},
21- )
22- print (completion .choices [0 ].message .content )
17+ def guided_choice_completion (client : OpenAI , model : str ):
18+ completion = client .chat .completions .create (
19+ model = model ,
20+ messages = [{
21+ "role" : "user" ,
22+ "content" : "Classify this sentiment: vLLM is wonderful!"
23+ }],
24+ extra_body = {"guided_choice" : ["positive" , "negative" ]},
25+ )
26+ return completion .choices [0 ].message .content
27+
2328
2429# Guided decoding by Regex
25- prompt = ("Generate an email address for Alan Turing, who works in Enigma."
26- "End in .com and new line. Example result:"
27- "alan.turing@enigma.com\n " )
28-
29- completion = client .chat .completions .create (
30- model = "Qwen/Qwen2.5-3B-Instruct" ,
31- messages = [{
32- "role" : "user" ,
33- "content" : prompt ,
34- }],
35- extra_body = {
36- "guided_regex" : r"\w+@\w+\.com\n" ,
37- "stop" : ["\n " ]
38- },
39- )
40- print (completion .choices [0 ].message .content )
30+ def guided_regex_completion (client : OpenAI , model : str ):
31+ prompt = ("Generate an email address for Alan Turing, who works in Enigma."
32+ "End in .com and new line. Example result:"
33+ "alan.turing@enigma.com\n " )
34+
35+ completion = client .chat .completions .create (
36+ model = model ,
37+ messages = [{
38+ "role" : "user" ,
39+ "content" : prompt ,
40+ }],
41+ extra_body = {
42+ "guided_regex" : r"\w+@\w+\.com\n" ,
43+ "stop" : ["\n " ]
44+ },
45+ )
46+ return completion .choices [0 ].message .content
4147
4248
4349# Guided decoding by JSON using Pydantic schema
@@ -54,66 +60,100 @@ class CarDescription(BaseModel):
5460 car_type : CarType
5561
5662
57- json_schema = CarDescription .model_json_schema ()
58-
59- prompt = ("Generate a JSON with the brand, model and car_type of"
60- "the most iconic car from the 90's" )
61- completion = client .chat .completions .create (
62- model = "Qwen/Qwen2.5-3B-Instruct" ,
63- messages = [{
64- "role" : "user" ,
65- "content" : prompt ,
66- }],
67- extra_body = {"guided_json" : json_schema },
68- )
69- print (completion .choices [0 ].message .content )
63+ def guided_json_completion (client : OpenAI , model : str ):
64+ json_schema = CarDescription .model_json_schema ()
7065
71- # Guided decoding by Grammar
72- simplified_sql_grammar = """
73- root ::= select_statement
66+ prompt = ("Generate a JSON with the brand, model and car_type of"
67+ "the most iconic car from the 90's" )
68+ completion = client .chat .completions .create (
69+ model = model ,
70+ messages = [{
71+ "role" : "user" ,
72+ "content" : prompt ,
73+ }],
74+ extra_body = {"guided_json" : json_schema },
75+ )
76+ return completion .choices [0 ].message .content
7477
75- select_statement ::= "SELECT " column " from " table " where " condition
7678
77- column ::= "col_1 " | "col_2 "
79+ # Guided decoding by Grammar
80+ def guided_grammar_completion (client : OpenAI , model : str ):
81+ simplified_sql_grammar = """
82+ root ::= select_statement
7883
79- table ::= "table_1 " | "table_2 "
84+ select_statement ::= "SELECT " column " from " table " where " condition
8085
81- condition ::= column "= " number
86+ column ::= "col_1 " | "col_2 "
8287
83- number ::= "1 " | "2 "
84- """
88+ table ::= "table_1 " | "table_2 "
8589
86- prompt = ("Generate an SQL query to show the 'username' and 'email'"
87- "from the 'users' table." )
88- completion = client .chat .completions .create (
89- model = "Qwen/Qwen2.5-3B-Instruct" ,
90- messages = [{
91- "role" : "user" ,
92- "content" : prompt ,
93- }],
94- extra_body = {"guided_grammar" : simplified_sql_grammar },
95- )
96- print (completion .choices [0 ].message .content )
90+ condition ::= column "= " number
9791
98- # Extra backend options
99- prompt = ("Generate an email address for Alan Turing, who works in Enigma."
100- "End in .com and new line. Example result:"
101- "alan.turing@enigma.com\n " )
92+ number ::= "1 " | "2 "
93+ """
10294
103- try :
104- # The no-fallback option forces vLLM to use xgrammar, so when it fails
105- # you get a 400 with the reason why
95+ prompt = ("Generate an SQL query to show the 'username' and 'email'"
96+ "from the 'users' table." )
10697 completion = client .chat .completions .create (
107- model = "Qwen/Qwen2.5-3B-Instruct" ,
98+ model = model ,
10899 messages = [{
109100 "role" : "user" ,
110101 "content" : prompt ,
111102 }],
112- extra_body = {
113- "guided_regex" : r"\w+@\w+\.com\n" ,
114- "stop" : ["\n " ],
115- "guided_decoding_backend" : "xgrammar:no-fallback"
116- },
103+ extra_body = {"guided_grammar" : simplified_sql_grammar },
104+ )
105+ return completion .choices [0 ].message .content
106+
107+
108+ # Extra backend options
109+ def extra_backend_options_completion (client : OpenAI , model : str ):
110+ prompt = ("Generate an email address for Alan Turing, who works in Enigma."
111+ "End in .com and new line. Example result:"
112+ "alan.turing@enigma.com\n " )
113+
114+ try :
115+ # The no-fallback option forces vLLM to use xgrammar, so when it fails
116+ # you get a 400 with the reason why
117+ completion = client .chat .completions .create (
118+ model = model ,
119+ messages = [{
120+ "role" : "user" ,
121+ "content" : prompt ,
122+ }],
123+ extra_body = {
124+ "guided_regex" : r"\w+@\w+\.com\n" ,
125+ "stop" : ["\n " ],
126+ "guided_decoding_backend" : "xgrammar:no-fallback"
127+ },
128+ )
129+ return completion .choices [0 ].message .content
130+ except BadRequestError as e :
131+ print ("This error is expected:" , e )
132+
133+
134+ def main ():
135+ client : OpenAI = OpenAI (
136+ base_url = "http://localhost:8000/v1" ,
137+ api_key = "-" ,
117138 )
118- except BadRequestError as e :
119- print ("This error is expected:" , e )
139+
140+ model = "Qwen/Qwen2.5-3B-Instruct"
141+
142+ print ("Guided Choice Completion:" )
143+ print (guided_choice_completion (client , model ))
144+
145+ print ("\n Guided Regex Completion:" )
146+ print (guided_regex_completion (client , model ))
147+
148+ print ("\n Guided JSON Completion:" )
149+ print (guided_json_completion (client , model ))
150+
151+ print ("\n Guided Grammar Completion:" )
152+ print (guided_grammar_completion (client , model ))
153+
154+ print ("\n Extra Backend Options Completion:" )
155+ print (extra_backend_options_completion (client , model ))
156+
157+
158+ if __name__ == "__main__" :
159+ main ()
0 commit comments