Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 45 additions & 26 deletions vllm/entrypoints/cli/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,28 @@ def _interactive_cli(args: argparse.Namespace) -> tuple[str, OpenAI]:
return model_name, openai_client


def _print_chat_stream(stream) -> str:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

To improve type safety and code readability, please add a type hint for the stream parameter. The openai client returns a Stream of ChatCompletionChunk objects. Using a string forward reference for the type hint is a good practice here.

You'll need to ensure the necessary types are imported within a TYPE_CHECKING block:

from typing import TYPE_CHECKING

if TYPE_CHECKING:
    from openai import Stream
    from openai.types.chat import ChatCompletionChunk
Suggested change
def _print_chat_stream(stream) -> str:
def _print_chat_stream(stream: "Stream[ChatCompletionChunk]") -> str:

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1

output = ""
for chunk in stream:
delta = chunk.choices[0].delta
if delta.content:
output += delta.content
print(delta.content, end="", flush=True)
print()
return output


def _print_completion_stream(stream) -> str:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

For consistency and to improve type safety, please add a type hint for the stream parameter. The openai client returns a Stream of Completion objects for completion requests.

You'll need to ensure the necessary types are imported within a TYPE_CHECKING block:

from typing import TYPE_CHECKING

if TYPE_CHECKING:
    from openai import Stream
    from openai.types import Completion
Suggested change
def _print_completion_stream(stream) -> str:
def _print_completion_stream(stream: "Stream[Completion]") -> str:

output = ""
for chunk in stream:
text = chunk.choices[0].text
if text is not None:
output += text
print(text, end="", flush=True)
print()
return output


def chat(system_prompt: str | None, model_name: str, client: OpenAI) -> None:
conversation: list[ChatCompletionMessageParam] = []
if system_prompt is not None:
Expand All @@ -58,14 +80,11 @@ def chat(system_prompt: str | None, model_name: str, client: OpenAI) -> None:
break
conversation.append({"role": "user", "content": input_message})

chat_completion = client.chat.completions.create(model=model_name,
messages=conversation)

response_message = chat_completion.choices[0].message
output = response_message.content

conversation.append(response_message) # type: ignore
print(output)
stream = client.chat.completions.create(model=model_name,
messages=conversation,
stream=True)
output = _print_chat_stream(stream)
conversation.append({"role": "assistant", "content": output})


def _add_query_options(
Expand Down Expand Up @@ -108,9 +127,11 @@ def cmd(args: argparse.Namespace) -> None:
if args.quick:
conversation.append({"role": "user", "content": args.quick})

chat_completion = client.chat.completions.create(
model=model_name, messages=conversation)
print(chat_completion.choices[0].message.content)
stream = client.chat.completions.create(model=model_name,
messages=conversation,
stream=True)
output = _print_chat_stream(stream)
conversation.append({"role": "assistant", "content": output})
return

print("Please enter a message for the chat model:")
Expand All @@ -121,14 +142,11 @@ def cmd(args: argparse.Namespace) -> None:
break
conversation.append({"role": "user", "content": input_message})

chat_completion = client.chat.completions.create(
model=model_name, messages=conversation)

response_message = chat_completion.choices[0].message
output = response_message.content

conversation.append(response_message) # type: ignore
print(output)
stream = client.chat.completions.create(model=model_name,
messages=conversation,
stream=True)
output = _print_chat_stream(stream)
conversation.append({"role": "assistant", "content": output})

@staticmethod
def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
Expand Down Expand Up @@ -168,9 +186,10 @@ def cmd(args: argparse.Namespace) -> None:
model_name, client = _interactive_cli(args)

if args.quick:
completion = client.completions.create(model=model_name,
prompt=args.quick)
print(completion.choices[0].text)
stream = client.completions.create(model=model_name,
prompt=args.quick,
stream=True)
_print_completion_stream(stream)
Comment on lines +189 to +192
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The streaming API call can raise exceptions. To make the CLI more robust, please wrap this call in a try...except block to handle potential errors gracefully.

            try:
                stream = client.completions.create(model=model_name,
                                                   prompt=args.quick,
                                                   stream=True)
                _print_completion_stream(stream)
            except Exception as e:
                print(f"\nAn error occurred: {e}")

return

print("Please enter prompt to complete:")
Expand All @@ -179,10 +198,10 @@ def cmd(args: argparse.Namespace) -> None:
input_prompt = input("> ")
except EOFError:
break
completion = client.completions.create(model=model_name,
prompt=input_prompt)
output = completion.choices[0].text
print(output)
stream = client.completions.create(model=model_name,
prompt=input_prompt,
stream=True)
_print_completion_stream(stream)
Comment on lines +201 to +204
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The streaming API call can raise exceptions. To make the CLI more robust, please wrap this call in a try...except block to handle potential errors gracefully.

            try:
                stream = client.completions.create(model=model_name,
                                                   prompt=input_prompt,
                                                   stream=True)
                _print_completion_stream(stream)
            except Exception as e:
                print(f"\nAn error occurred: {e}")


@staticmethod
def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
Expand Down