Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 104 additions & 0 deletions examples/online_serving/minimax_m2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import json

from openai import OpenAI

"""
https://huggingface.co/MiniMaxAI/MiniMax-M2/blob/main/docs/tool_calling_guide.md

vllm serve MiniMaxAI/MiniMax-M2 \
--tensor-parallel-size 4 \
--tool-call-parser minimax_m2 \
--reasoning-parser minimax_m2_append_think \
--enable-auto-tool-choice \
--port 8000
"""

client = OpenAI(base_url="http://localhost:8000/v1", api_key="dummy")


def get_weather(location: str, unit: str):
return f"The weather for {location} in {unit} is 20"


tool_functions = {"get_weather": get_weather}

tools = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "City and state, e.g., 'San Francisco, CA'",
},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
},
"required": ["location", "unit"],
},
},
}
]

messages = [
{
"role": "user",
"content": "What's the weather like in San Francisco? use celsius.",
}
]

response = client.chat.completions.create(
model=client.models.list().data[0].id,
messages=messages,
tools=tools,
tool_choice="auto",
)

# print(response)

# tool_call = response.choices[0].message.tool_calls[0].function
# print(f"Function called: {tool_call.name}")
# print(f"Arguments: {tool_call.arguments}")
# print(f"Result: {get_weather(**json.loads(tool_call.arguments))}")

# feed back into
# import fbvscode; fbvscode.set_trace()

print("=== First response ===")
print(response)

# Step 3: Extract and call the function
tool_call = response.choices[0].message.tool_calls[0].function
name = tool_call.name
args = json.loads(tool_call.arguments)
result = tool_functions[name](**args)

print(f"\nFunction called: {name}")
print(f"Arguments: {args}")
print(f"Result: {result}")

# Step 4: Send the result back to the model
messages.append(
{"role": "assistant", "tool_calls": response.choices[0].message.tool_calls}
)
messages.append(
{
"role": "tool",
"tool_call_id": response.choices[0].message.tool_calls[0].id,
"content": result,
}
)

# Step 5: Second call — model sees tool output
second_response = client.chat.completions.create(
model=client.models.list().data[0].id,
messages=messages,
)

print("\n=== Second response ===")
print(second_response.choices[0].message)
69 changes: 49 additions & 20 deletions examples/online_serving/openai_responses_client_with_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,20 @@
"""
Set up this example by starting a vLLM OpenAI-compatible server with tool call
options enabled.
Reasoning models can be used through the Responses API as seen here
Reasoning models can be used through the Responses API as seen here
https://platform.openai.com/docs/api-reference/responses
For example:

--tool-call-parser minimax_m2 -> this outputs <minimax:tool_call> which isn't what the model has?? <tool_calls>

Check failure on line 10 in examples/online_serving/openai_responses_client_with_tools.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

examples/online_serving/openai_responses_client_with_tools.py:10:89: E501 Line too long (111 > 88)

vllm serve MiniMaxAI/MiniMax-M2 \
--tensor-parallel-size 4 \
--tool-call-parser minimax \
--reasoning-parser minimax_m2_append_think \
--enable-auto-tool-choice \
--chat-template examples/tool_chat_template_minimax_m1.jinja \
--port 8000

vllm serve Qwen/Qwen3-1.7B --reasoning-parser qwen3 \
--structured-outputs-config.backend xgrammar \
--enable-auto-tool-choice --tool-call-parser hermes
Expand All @@ -17,51 +28,69 @@
from utils import get_first_model


def get_weather(latitude: float, longitude: float) -> str:
"""
Mock function to simulate getting weather data.
In a real application, this would call an external weather API.
"""
return f"Current temperature at ({latitude}, {longitude}) is 20°C."
# def get_horoscope(sign):
# return f"{sign}: Next Tuesday you will befriend a baby otter."


# tools = [
# {
# "type": "custom",
# "name": "get_horoscope",
# "description": "Get today’s horoscope for an astrological sign.",
# "parameters": {
# "type": "object",
# "properties": {
# "sign": {
# "type": "string",
# "description": "Astrological sign, e.g. Aries, Taurus, Gemini, etc."
# }
# },
# "required": ["sign"]
# }
# }
# ]

tools = [
{
def get_weather(location: str, unit: str):
return f"THe weather for {location} in {unit} is 20"

tools = [{
"type": "function",
"name": "get_weather",
"description": "Get current temperature for provided coordinates in celsius.",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"latitude": {"type": "number"},
"longitude": {"type": "number"},
"location": {"type": "string"},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
},
"required": ["latitude", "longitude"],
"additionalProperties": False,
},
"strict": True,
"required": ["location", "unit"],
}
}
]

input_messages = [
{"role": "user", "content": "What's the weather like in Paris today?"}
{"role": "user", "content": "What is the weather in Paris in Celsius today?"}
]


def main():
base_url = "http://0.0.0.0:8000/v1"
base_url = "http://localhost:8000/v1"
client = OpenAI(base_url=base_url, api_key="empty")
model = get_first_model(client)
response = client.responses.create(
model=model, input=input_messages, tools=tools, tool_choice="required"
model=model, input=input_messages, tools=tools,
# tool_choice="required" #this breaks it for custom tools
)

print(response)
# import fbvscode; fbvscode.set_trace()

for out in response.output:
if out.type == "function_call":
print("Function call:", out.name, out.arguments)
tool_call = out
args = json.loads(tool_call.arguments)
result = get_weather(args["latitude"], args["longitude"])
result = get_weather(args["location"], args["unit"])

input_messages.append(tool_call) # append model's function call message
input_messages.append(
Expand Down
23 changes: 21 additions & 2 deletions vllm/entrypoints/openai/serving_responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,24 @@

logger = init_logger(__name__)

def convert_tool_schema(tool: dict) -> dict:
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

with this

vllm serve MiniMaxAI/MiniMax-M2 --tensor-parallel-size 4 --tool-call-parser minimax_m2 --reasoning-parser minimax_m2_append_think --enable-auto-tool-choice --port 8000

(gpt_oss_edit) [axia@devvm30969.cln0 /data/users/axia/gitrepos/vllm (af1965f4)]$ curl -X POST "http://localhost:8000/v1/responses"   -H "Content-Type: application/json"   -H "Authorization: Bearer dummy-api-key"   -d '{
        "model": "MiniMaxAI/MiniMax-M2",
        "input": "Whats the weather like in San Francisco? use celsius.",
        "tools": [{
            "type": "function",
            "name": "get_weather",
            "description": "Get the current weather in a given location",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {"type": "string"},
                    "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}
                },
                "required": ["location", "unit"]
            }
        }]
      }'
{
    "id": "resp_1258a78c6613423e89925bce8d31c40a",
    "created_at": 1762195579,
    "incomplete_details": null,
    "instructions": null,
    "metadata": null,
    "model": "MiniMaxAI/MiniMax-M2",
    "object": "response",
    "output": [
        {
            "id": "rs_5db4414970f74f2092d7062d18240b4c",
            "summary": [],
            "type": "reasoning",
            "content": [
                {
                    "text": "Okay, let's analyze what the user is asking for. They want to know about the weather in San Francisco and specifically requested the temperature in Celsius. This is a straightforward request that requires me to use one of my available tools.\n\nLooking at my available tools, I have \"get_weather\" which is perfect for this situation. The tool requires two parameters:\n1. \"location\" - which should be a string representing the place\n2. \"unit\" - which can be either \"celsius\" or \"fahrenheit\"\n\nThe user has clearly specified both parameters:\n- Location: San Francisco\n- Unit: celsius\n\nI need to format my response using the tool_calls format as specified in my instructions. The format requires me to include the tool name and a JSON object with the arguments.\n\nSo I'll need to call the get_weather tool with location=\"San Francisco\" and unit=\"celsius\". This should retrieve the current weather information for San Francisco with the temperature displayed in Celsius as requested.\n\nI notice the user didn't ask for any specific weather details beyond the general conditions, so I'll just call the basic weather tool. If they want more specific information after seeing the initial results, they can ask in a follow-up message.\n\nLet me prepare the proper tool call with these parameters.\n</think>",
                    "type": "reasoning_text"
                }
            ],
            "encrypted_content": null,
            "status": null
        },
        {
            "arguments": "{\"location\": \"San Francisco\", \"unit\": \"celsius\"}",
            "call_id": "call_b3b27bb9b8f249cda223aba574f583ac",
            "name": "get_weather",
            "type": "function_call",
            "id": "fc_68ca0384b1a54c36bc06cf8f834beeda",
            "status": "completed"
        }
    ],
    "parallel_tool_calls": true,
    "temperature": 1.0,
    "tool_choice": "auto",
    "tools": [
        {
            "name": "get_weather",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string"
                    },
                    "unit": {
                        "type": "string",
                        "enum": [
                            "celsius",
                            "fahrenheit"
                        ]
                    }
                },
                "required": [
                    "location",
                    "unit"
                ]
            },
            "strict": null,
            "type": "function",
            "description": "Get the current weather in a given location"
        }
    ],
    "top_p": 0.95,
    "background": false,
    "max_output_tokens": 196394,
    "max_tool_calls": null,
    "previous_response_id": null,
    "prompt": null,
    "reasoning": null,
    "service_tier": "auto",
    "status": "completed",
    "text": null,
    "top_logprobs": null,
    "truncation": "disabled",
    "usage": {
        "input_tokens": 214,
        "input_tokens_details": {
            "cached_tokens": 0,
            "input_tokens_per_turn": [],
            "cached_tokens_per_turn": []
        },
        "output_tokens": 296,
        "output_tokens_details": {
            "reasoning_tokens": 0,
            "tool_output_tokens": 0,
            "output_tokens_per_turn": [],
            "tool_output_tokens_per_turn": []
        },
        "total_tokens": 510
    },
    "user": null,
    "input_messages": null,
    "output_messages": null
}

"""
Convert a flat tool schema:
{"type": "function", "name": "...", "description": "...", "parameters": {...}}
into:
{"type": "function", "function": {...}}
"""
if tool.get("type") != "function":
raise ValueError("Expected tool['type'] == 'function'")

# Extract everything except 'type' and wrap inside 'function'
function_body = {k: v for k, v in tool.items() if k != "type"}

return {
"type": "function",
"function": function_body,
}


def extract_tool_types(tools: list[Tool]) -> set[str]:
"""
Expand Down Expand Up @@ -520,12 +538,13 @@
prev_response: ResponsesResponse | None,
tokenizer: AnyTokenizer,
):
import fbvscode; fbvscode.set_trace()

Check failure on line 541 in vllm/entrypoints/openai/serving_responses.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E702)

vllm/entrypoints/openai/serving_responses.py:541:24: E702 Multiple statements on one line (semicolon)

Check failure on line 541 in vllm/entrypoints/openai/serving_responses.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (I001)

vllm/entrypoints/openai/serving_responses.py:541:9: I001 Import block is un-sorted or un-formatted
if request.tools is None or (
request.tool_choice == "none" and self.exclude_tools_when_tool_choice_none
):
tool_dicts = None
else:
tool_dicts = [tool.model_dump() for tool in request.tools]
tool_dicts = [convert_tool_schema(tool.model_dump()) for tool in request.tools]

Check failure on line 547 in vllm/entrypoints/openai/serving_responses.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

vllm/entrypoints/openai/serving_responses.py:547:89: E501 Line too long (91 > 88)
# Construct the input messages.
messages = self._construct_input_messages(request, prev_response)
_, request_prompts, engine_prompts = await self._preprocess_chat(
Expand Down Expand Up @@ -853,7 +872,7 @@
content=[output_text],
role="assistant",
status="completed",
type="message",
type="message", #this could be a function call output
)
outputs = []

Expand Down
2 changes: 1 addition & 1 deletion vllm/entrypoints/openai/tool_parsers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,6 @@ def get_json_schema_from_tools(
return tool_map[tool_name].function.parameters
# tool_choice: "required"
if tool_choice == "required":
return _get_json_schema_from_tools(tools)
return _get_json_schema_from_tools(tools) #this break
# tool_choice: "auto"
return None
13 changes: 12 additions & 1 deletion vllm/reasoning/minimax_m2_reasoning_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

logger = init_logger(__name__)

import re

Check failure on line 18 in vllm/reasoning/minimax_m2_reasoning_parser.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E402)

vllm/reasoning/minimax_m2_reasoning_parser.py:18:1: E402 Module level import not at top of file
from typing import Tuple

@ReasoningParserManager.register_module("minimax_m2")
class MiniMaxM2ReasoningParser(BaseThinkingReasoningParser):
Expand Down Expand Up @@ -66,4 +68,13 @@
def extract_reasoning_content(
self, model_output: str, request: ChatCompletionRequest | ResponsesRequest
) -> tuple[str | None, str | None]:
return None, "<think>" + model_output

match = re.search(r"</think>\s*", model_output, re.DOTALL)
if not match:
return model_output, ""

end_idx = match.end()
before = model_output[:end_idx]
after = model_output[end_idx:]
return before.strip(), after.strip()
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this makes the result

"output": [
        {
            "id": "rs_3f8177bfe6964ad896e3b092f5831839",
            "summary": [],
            "type": "reasoning",
            "content": [
                {
                    "text": "<think>\nLet me analyze this request. The user is asking for today's horoscope for Leo, which is one of the astrological signs. This is a straightforward request that requires me to retrieve astrological information.\n\nLooking at the tools available to me, I see I have access to a \"get_horoscope\" tool that can fetch today's horoscope for a specific astrological sign. This is exactly what I need for this request.\n\nThe tool requires a parameter called \"sign\" which should be the astrological sign. In this case, the user has specifically asked for Leo, so I'll need to pass \"Leo\" as the argument.\n\nThe format for calling this tool is to use the <tool_calls></tool_calls> XML tags and provide a JSON object with the tool name and arguments. For this case, I need to:\n1. Set the tool name as \"get_horoscope\"\n2. Set the arguments as a JSON object with the \"sign\" parameter set to \"Leo\"\n\nThis should retrieve today's horoscope for Leo, which is exactly what the user requested. The tool will handle the actual fetching of the horoscope information, and I'll receive the results to share with the user.\n\nI don't need to do any additional processing or formatting at this point - I just need to make the tool call with the correct parameters. Once I get the results back from the tool, I can then present the horoscope information to the user in a clear and helpful way.\n\nSo I'll proceed with calling the get_horoscope tool with the sign parameter set to \"Leo\".\n</think>",
                    "type": "reasoning_text"
                }
            ],
            "encrypted_content": null,
            "status": null
        },
        {
            "id": "msg_78e7fc9753fd45edab0058b6f6db62a0",
            "content": [
                {
                    "annotations": [],
                    "text": "<tool_calls>\n{\"name\": \"get_horoscope\", \"arguments\": {\"sign\": \"Leo\"}}\n</tool_calls>",
                    "type": "output_text",
                    "logprobs": null
                }
            ],
            "role": "assistant",
            "status": "completed",
            "type": "message"
        }
    ],

# return None, "<think>" + model_output
Loading