forked from vllm-project/vllm
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Feature] OpenAI-Compatible Tools API + Streaming for Hermes & Mistra…
…l models (vllm-project#5649) Co-authored-by: constellate <constellate@1-ai-appserver-staging.codereach.com> Co-authored-by: Kyle Mistele <kyle@constellate.ai> Signed-off-by: Alvant <alvasian@yandex.ru>
- Loading branch information
Showing
26 changed files
with
2,588 additions
and
83 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,162 @@ | ||
""" | ||
Set up this example by starting a vLLM OpenAI-compatible server with tool call | ||
options enabled. For example: | ||
IMPORTANT: for mistral, you must use one of the provided mistral tool call | ||
templates, or your own - the model default doesn't work for tool calls with vLLM | ||
See the vLLM docs on OpenAI server & tool calling for more details. | ||
vllm serve --model mistralai/Mistral-7B-Instruct-v0.3 \ | ||
--chat-template examples/tool_chat_template_mistral.jinja \ | ||
--enable-auto-tool-choice --tool-call-parser mistral | ||
OR | ||
vllm serve --model NousResearch/Hermes-2-Pro-Llama-3-8B \ | ||
--chat-template examples/tool_chat_template_hermes.jinja \ | ||
--enable-auto-tool-choice --tool-call-parser hermes | ||
""" | ||
import json | ||
|
||
from openai import OpenAI | ||
|
||
# Modify OpenAI's API key and API base to use vLLM's API server. | ||
openai_api_key = "EMPTY" | ||
openai_api_base = "http://localhost:8000/v1" | ||
|
||
client = OpenAI( | ||
# defaults to os.environ.get("OPENAI_API_KEY") | ||
api_key=openai_api_key, | ||
base_url=openai_api_base, | ||
) | ||
|
||
models = client.models.list() | ||
model = models.data[0].id | ||
|
||
tools = [{ | ||
"type": "function", | ||
"function": { | ||
"name": "get_current_weather", | ||
"description": "Get the current weather in a given location", | ||
"parameters": { | ||
"type": "object", | ||
"properties": { | ||
"city": { | ||
"type": | ||
"string", | ||
"description": | ||
"The city to find the weather for, e.g. 'San Francisco'" | ||
}, | ||
"state": { | ||
"type": | ||
"string", | ||
"description": | ||
"the two-letter abbreviation for the state that the city is" | ||
" in, e.g. 'CA' which would mean 'California'" | ||
}, | ||
"unit": { | ||
"type": "string", | ||
"description": "The unit to fetch the temperature in", | ||
"enum": ["celsius", "fahrenheit"] | ||
} | ||
}, | ||
"required": ["city", "state", "unit"] | ||
} | ||
} | ||
}] | ||
|
||
messages = [{ | ||
"role": "user", | ||
"content": "Hi! How are you doing today?" | ||
}, { | ||
"role": "assistant", | ||
"content": "I'm doing well! How can I help you?" | ||
}, { | ||
"role": | ||
"user", | ||
"content": | ||
"Can you tell me what the temperate will be in Dallas, in fahrenheit?" | ||
}] | ||
|
||
chat_completion = client.chat.completions.create(messages=messages, | ||
model=model, | ||
tools=tools) | ||
|
||
print("Chat completion results:") | ||
print(chat_completion) | ||
print("\n\n") | ||
|
||
tool_calls_stream = client.chat.completions.create(messages=messages, | ||
model=model, | ||
tools=tools, | ||
stream=True) | ||
|
||
chunks = [] | ||
for chunk in tool_calls_stream: | ||
chunks.append(chunk) | ||
if chunk.choices[0].delta.tool_calls: | ||
print(chunk.choices[0].delta.tool_calls[0]) | ||
else: | ||
print(chunk.choices[0].delta) | ||
|
||
arguments = [] | ||
tool_call_idx = -1 | ||
for chunk in chunks: | ||
|
||
if chunk.choices[0].delta.tool_calls: | ||
tool_call = chunk.choices[0].delta.tool_calls[0] | ||
|
||
if tool_call.index != tool_call_idx: | ||
if tool_call_idx >= 0: | ||
print( | ||
f"streamed tool call arguments: {arguments[tool_call_idx]}" | ||
) | ||
tool_call_idx = chunk.choices[0].delta.tool_calls[0].index | ||
arguments.append("") | ||
if tool_call.id: | ||
print(f"streamed tool call id: {tool_call.id} ") | ||
|
||
if tool_call.function: | ||
if tool_call.function.name: | ||
print(f"streamed tool call name: {tool_call.function.name}") | ||
|
||
if tool_call.function.arguments: | ||
arguments[tool_call_idx] += tool_call.function.arguments | ||
|
||
if len(arguments): | ||
print(f"streamed tool call arguments: {arguments[-1]}") | ||
|
||
print("\n\n") | ||
|
||
messages.append({ | ||
"role": "assistant", | ||
"tool_calls": chat_completion.choices[0].message.tool_calls | ||
}) | ||
|
||
|
||
# Now, simulate a tool call | ||
def get_current_weather(city: str, state: str, unit: 'str'): | ||
return ("The weather in Dallas, Texas is 85 degrees fahrenheit. It is " | ||
"partly cloudly, with highs in the 90's.") | ||
|
||
|
||
available_tools = {"get_current_weather": get_current_weather} | ||
|
||
completion_tool_calls = chat_completion.choices[0].message.tool_calls | ||
for call in completion_tool_calls: | ||
tool_to_call = available_tools[call.function.name] | ||
args = json.loads(call.function.arguments) | ||
result = tool_to_call(**args) | ||
print(result) | ||
messages.append({ | ||
"role": "tool", | ||
"content": result, | ||
"tool_call_id": call.id, | ||
"name": call.function.name | ||
}) | ||
|
||
chat_completion_2 = client.chat.completions.create(messages=messages, | ||
model=model, | ||
tools=tools, | ||
stream=False) | ||
print("\n\n") | ||
print(chat_completion_2) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
{%- macro json_to_python_type(json_spec) %} | ||
{%- set basic_type_map = { | ||
"string": "str", | ||
"number": "float", | ||
"integer": "int", | ||
"boolean": "bool" | ||
} %} | ||
|
||
{%- if basic_type_map[json_spec.type] is defined %} | ||
{{- basic_type_map[json_spec.type] }} | ||
{%- elif json_spec.type == "array" %} | ||
{{- "list[" + json_to_python_type(json_spec|items) + "]" }} | ||
{%- elif json_spec.type == "object" %} | ||
{%- if json_spec.additionalProperties is defined %} | ||
{{- "dict[str, " + json_to_python_type(json_spec.additionalProperties) + ']' }} | ||
{%- else %} | ||
{{- "dict" }} | ||
{%- endif %} | ||
{%- elif json_spec.type is iterable %} | ||
{{- "Union[" }} | ||
{%- for t in json_spec.type %} | ||
{{- json_to_python_type({"type": t}) }} | ||
{%- if not loop.last %} | ||
{{- "," }} | ||
{%- endif %} | ||
{%- endfor %} | ||
{{- "]" }} | ||
{%- else %} | ||
{{- "Any" }} | ||
{%- endif %} | ||
{%- endmacro %} | ||
|
||
|
||
{{- bos_token }} | ||
{{- "<|im_start|>system\nYou are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: <tools> " }} | ||
{%- if tools is iterable and tools | length > 0 %} | ||
{%- for tool in tools %} | ||
{%- if tool.function is defined %} | ||
{%- set tool = tool.function %} | ||
{%- endif %} | ||
{{- '{"type": "function", "function": ' }} | ||
{{- '{"name": "' + tool.name + '", ' }} | ||
{{- '"description": "' + tool.name + '(' }} | ||
{%- for param_name, param_fields in tool.parameters.properties|items %} | ||
{{- param_name + ": " + json_to_python_type(param_fields) }} | ||
{%- if not loop.last %} | ||
{{- ", " }} | ||
{%- endif %} | ||
{%- endfor %} | ||
{{- ")" }} | ||
{%- if tool.return is defined %} | ||
{{- " -> " + json_to_python_type(tool.return) }} | ||
{%- endif %} | ||
{{- " - " + tool.description + "\n\n" }} | ||
{%- for param_name, param_fields in tool.parameters.properties|items %} | ||
{%- if loop.first %} | ||
{{- " Args:\n" }} | ||
{%- endif %} | ||
{{- " " + param_name + "(" + json_to_python_type(param_fields) + "): " + param_fields.description|trim }} | ||
{%- endfor %} | ||
{%- if tool.return is defined and tool.return.description is defined %} | ||
{{- "\n Returns:\n " + tool.return.description }} | ||
{%- endif %} | ||
{{- '"' }} | ||
{{- ', "parameters": ' }} | ||
{%- if tool.parameters.properties | length == 0 %} | ||
{{- "{}" }} | ||
{%- else %} | ||
{{- tool.parameters|tojson }} | ||
{%- endif %} | ||
{{- "}" }} | ||
{%- if not loop.last %} | ||
{{- "\n" }} | ||
{%- endif %} | ||
{%- endfor %} | ||
{%- endif %} | ||
{{- " </tools>" }} | ||
{{- 'Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}} | ||
' }} | ||
{{- "For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows: | ||
" }} | ||
{{- "<tool_call> | ||
" }} | ||
{{- '{"name": <function-name>, "arguments": <args-dict>} | ||
' }} | ||
{{- '</tool_call><|im_end|>' }} | ||
{%- for message in messages %} | ||
{%- if message.role == "user" or message.role == "system" or (message.role == "assistant" and message.tool_calls is not defined) %} | ||
{{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} | ||
{%- elif message.role == "assistant" and message.tool_calls is defined %} | ||
{{- '<|im_start|>' + message.role }} | ||
{%- for tool_call in message.tool_calls %} | ||
{{- '\n<tool_call>\n' }} | ||
{%- if tool_call.function is defined %} | ||
{%- set tool_call = tool_call.function %} | ||
{%- endif %} | ||
{{- '{' }} | ||
{{- '"name": "' }} | ||
{{- tool_call.name }} | ||
{{- '"}' }} | ||
{{- ', ' }} | ||
{%- if tool_call.arguments is defined %} | ||
{{- '"arguments": ' }} | ||
{{- tool_call.arguments|tojson }} | ||
{%- endif %} | ||
{{- '\n</tool_call>' }} | ||
{%- endfor %} | ||
{{- '<|im_end|>\n' }} | ||
{%- elif message.role == "tool" %} | ||
{%- if loop.previtem and loop.previtem.role != "tool" %} | ||
{{- '<|im_start|>tool\n' }} | ||
{%- endif %} | ||
{{- '<tool_response>\n' }} | ||
{{- message.content }} | ||
{%- if not loop.last %} | ||
{{- '\n</tool_response>\n' }} | ||
{%- else %} | ||
{{- '\n</tool_response>' }} | ||
{%- endif %} | ||
{%- if not loop.last and loop.nextitem.role != "tool" %} | ||
{{- '<|im_end|>' }} | ||
{%- elif loop.last %} | ||
{{- '<|im_end|>' }} | ||
{%- endif %} | ||
{%- endif %} | ||
{%- endfor %} | ||
{%- if add_generation_prompt %} | ||
{{- '<|im_start|>assistant\n' }} | ||
{%- endif %} |
Oops, something went wrong.