11# SPDX-License-Identifier: Apache-2.0
22
33# The CLI entrypoint to vLLM.
4- import argparse
54import os
65import signal
76import sys
8- from typing import List , Optional
9-
10- from openai import OpenAI
11- from openai .types .chat import ChatCompletionMessageParam
127
8+ import vllm .cmd .openai
139import vllm .cmd .serve
1410import vllm .version
1511from vllm .logger import init_logger
1612from vllm .utils import FlexibleArgumentParser
1713
1814logger = init_logger (__name__ )
1915
16+ CMD_MODULES = [
17+ vllm .cmd .openai ,
18+ vllm .cmd .serve ,
19+ ]
20+
2021
2122def register_signal_handlers ():
2223
@@ -27,83 +28,6 @@ def signal_handler(sig, frame):
2728 signal .signal (signal .SIGTSTP , signal_handler )
2829
2930
30- def interactive_cli (args : argparse .Namespace ) -> None :
31- register_signal_handlers ()
32-
33- base_url = args .url
34- api_key = args .api_key or os .environ .get ("OPENAI_API_KEY" , "EMPTY" )
35- openai_client = OpenAI (api_key = api_key , base_url = base_url )
36-
37- if args .model_name :
38- model_name = args .model_name
39- else :
40- available_models = openai_client .models .list ()
41- model_name = available_models .data [0 ].id
42-
43- print (f"Using model: { model_name } " )
44-
45- if args .command == "complete" :
46- complete (model_name , openai_client )
47- elif args .command == "chat" :
48- chat (args .system_prompt , model_name , openai_client )
49-
50-
51- def complete (model_name : str , client : OpenAI ) -> None :
52- print ("Please enter prompt to complete:" )
53- while True :
54- input_prompt = input ("> " )
55-
56- completion = client .completions .create (model = model_name ,
57- prompt = input_prompt )
58- output = completion .choices [0 ].text
59- print (output )
60-
61-
62- def chat (system_prompt : Optional [str ], model_name : str ,
63- client : OpenAI ) -> None :
64- conversation : List [ChatCompletionMessageParam ] = []
65- if system_prompt is not None :
66- conversation .append ({"role" : "system" , "content" : system_prompt })
67-
68- print ("Please enter a message for the chat model:" )
69- while True :
70- input_message = input ("> " )
71- conversation .append ({"role" : "user" , "content" : input_message })
72-
73- chat_completion = client .chat .completions .create (model = model_name ,
74- messages = conversation )
75-
76- response_message = chat_completion .choices [0 ].message
77- output = response_message .content
78-
79- conversation .append (response_message ) # type: ignore
80- print (output )
81-
82-
83- def _add_query_options (
84- parser : FlexibleArgumentParser ) -> FlexibleArgumentParser :
85- parser .add_argument (
86- "--url" ,
87- type = str ,
88- default = "http://localhost:8000/v1" ,
89- help = "url of the running OpenAI-Compatible RESTful API server" )
90- parser .add_argument (
91- "--model-name" ,
92- type = str ,
93- default = None ,
94- help = ("The model name used in prompt completion, default to "
95- "the first model in list models API call." ))
96- parser .add_argument (
97- "--api-key" ,
98- type = str ,
99- default = None ,
100- help = (
101- "API key for OpenAI services. If provided, this api key "
102- "will overwrite the api key obtained through environment variables."
103- ))
104- return parser
105-
106-
10731def env_setup ():
10832 # The safest multiprocessing method is `spawn`, as the default `fork` method
10933 # is not compatible with some accelerators. The default method will be
@@ -134,43 +58,17 @@ def main():
13458 action = 'version' ,
13559 version = vllm .version .__version__ )
13660 subparsers = parser .add_subparsers (required = True , dest = "subparser" )
137-
138- cmd_modules = [
139- vllm .cmd .serve ,
140- ]
14161 cmds = {}
142- for cmd_module in cmd_modules :
143- cmd = cmd_module .cmd_init ()
144- cmd .subparser_init (subparsers ).set_defaults (dispatch_function = cmd .cmd )
145- cmds [cmd .name ] = cmd
146-
147- complete_parser = subparsers .add_parser (
148- "complete" ,
149- help = ("Generate text completions based on the given prompt "
150- "via the running API server" ),
151- usage = "vllm complete [options]" )
152- _add_query_options (complete_parser )
153- complete_parser .set_defaults (dispatch_function = interactive_cli ,
154- command = "complete" )
155-
156- chat_parser = subparsers .add_parser (
157- "chat" ,
158- help = "Generate chat completions via the running API server" ,
159- usage = "vllm chat [options]" )
160- _add_query_options (chat_parser )
161- chat_parser .add_argument (
162- "--system-prompt" ,
163- type = str ,
164- default = None ,
165- help = ("The system prompt to be added to the chat template, "
166- "used for models that support system prompts." ))
167- chat_parser .set_defaults (dispatch_function = interactive_cli , command = "chat" )
168-
62+ for cmd_module in CMD_MODULES :
63+ new_cmds = cmd_module .cmd_init ()
64+ for cmd in new_cmds :
65+ cmd .subparser_init (subparsers ).set_defaults (
66+ dispatch_function = cmd .cmd )
67+ cmds [cmd .name ] = cmd
16968 args = parser .parse_args ()
17069 if args .subparser in cmds :
17170 cmds [args .subparser ].validate (args )
17271
173- # One of the sub commands should be executed.
17472 if hasattr (args , "dispatch_function" ):
17573 args .dispatch_function (args )
17674 else :
0 commit comments