qandrew · qandrew · Oct 29, 2025 · Oct 29, 2025 · Oct 29, 2025 · Oct 29, 2025
diff --git a/examples/online_serving/minimax_m2.py b/examples/online_serving/minimax_m2.py
@@ -0,0 +1,104 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import json
+
+from openai import OpenAI
+
+"""
+https://huggingface.co/MiniMaxAI/MiniMax-M2/blob/main/docs/tool_calling_guide.md
+
+vllm serve MiniMaxAI/MiniMax-M2 \
+    --tensor-parallel-size 4 \
+    --tool-call-parser minimax_m2 \
+    --reasoning-parser minimax_m2_append_think \
+    --enable-auto-tool-choice \
+    --port 8000
+"""
+
+client = OpenAI(base_url="http://localhost:8000/v1", api_key="dummy")
+
+
+def get_weather(location: str, unit: str):
+    return f"The weather for {location} in {unit} is 20"
+
+
+tool_functions = {"get_weather": get_weather}
+
+tools = [
+    {
+        "type": "function",
+        "function": {
+            "name": "get_weather",
+            "description": "Get the current weather in a given location",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "location": {
+                        "type": "string",
+                        "description": "City and state, e.g., 'San Francisco, CA'",
+                    },
+                    "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
+                },
+                "required": ["location", "unit"],
+            },
+        },
+    }
+]
+
+messages = [
+    {
+        "role": "user",
+        "content": "What's the weather like in San Francisco? use celsius.",
+    }
+]
+
+response = client.chat.completions.create(
+    model=client.models.list().data[0].id,
+    messages=messages,
+    tools=tools,
+    tool_choice="auto",
+)
+
+# print(response)
+
+# tool_call = response.choices[0].message.tool_calls[0].function
+# print(f"Function called: {tool_call.name}")
+# print(f"Arguments: {tool_call.arguments}")
+# print(f"Result: {get_weather(**json.loads(tool_call.arguments))}")
+
+# feed back into
+# import fbvscode; fbvscode.set_trace()
+
+print("=== First response ===")
+print(response)
+
+# Step 3: Extract and call the function
+tool_call = response.choices[0].message.tool_calls[0].function
+name = tool_call.name
+args = json.loads(tool_call.arguments)
+result = tool_functions[name](**args)
+
+print(f"\nFunction called: {name}")
+print(f"Arguments: {args}")
+print(f"Result: {result}")
+
+# Step 4: Send the result back to the model
+messages.append(
+    {"role": "assistant", "tool_calls": response.choices[0].message.tool_calls}
+)
+messages.append(
+    {
+        "role": "tool",
+        "tool_call_id": response.choices[0].message.tool_calls[0].id,
+        "content": result,
+    }
+)
+
+# Step 5: Second call — model sees tool output
+second_response = client.chat.completions.create(
+    model=client.models.list().data[0].id,
+    messages=messages,
+)
+
+print("\n=== Second response ===")
+print(second_response.choices[0].message)
diff --git a/examples/online_serving/openai_responses_client_with_tools.py b/examples/online_serving/openai_responses_client_with_tools.py
@@ -3,9 +3,20 @@
 """
 Set up this example by starting a vLLM OpenAI-compatible server with tool call
 options enabled.
-Reasoning models can be used through the Responses API as seen here 
+Reasoning models can be used through the Responses API as seen here
 https://platform.openai.com/docs/api-reference/responses
 For example:
+
+--tool-call-parser minimax_m2 -> this outputs <minimax:tool_call> which isn't what the model has?? <tool_calls>
+
+vllm serve MiniMaxAI/MiniMax-M2 \
+    --tensor-parallel-size 4 \
+    --tool-call-parser minimax \
+    --reasoning-parser minimax_m2_append_think \
+    --enable-auto-tool-choice \
+    --chat-template examples/tool_chat_template_minimax_m1.jinja \
+    --port 8000
+
 vllm serve Qwen/Qwen3-1.7B --reasoning-parser qwen3 \
       --structured-outputs-config.backend xgrammar \
       --enable-auto-tool-choice --tool-call-parser hermes
@@ -17,51 +28,69 @@
 from utils import get_first_model
 
 
-def get_weather(latitude: float, longitude: float) -> str:
-    """
-    Mock function to simulate getting weather data.
-    In a real application, this would call an external weather API.
-    """
-    return f"Current temperature at ({latitude}, {longitude}) is 20°C."
+# def get_horoscope(sign):
+#     return f"{sign}: Next Tuesday you will befriend a baby otter."
+
 
+# tools = [
+#     {
+#         "type": "custom",
+#         "name": "get_horoscope",
+#         "description": "Get today’s horoscope for an astrological sign.",
+#         "parameters": {
+#           "type": "object",
+#           "properties": {
+#             "sign": {
+#               "type": "string",
+#               "description": "Astrological sign, e.g. Aries, Taurus, Gemini, etc."
+#             }
+#           },
+#           "required": ["sign"]
+#         }
+#       }
+# ]
 
-tools = [
-    {
+def get_weather(location: str, unit: str):
+    return f"THe weather for {location} in {unit} is 20"
+
+tools = [{
         "type": "function",
         "name": "get_weather",
-        "description": "Get current temperature for provided coordinates in celsius.",
+        "description": "Get the current weather in a given location",
         "parameters": {
             "type": "object",
             "properties": {
-                "latitude": {"type": "number"},
-                "longitude": {"type": "number"},
+                "location": {"type": "string"},
+                "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
             },
-            "required": ["latitude", "longitude"],
-            "additionalProperties": False,
-        },
-        "strict": True,
+            "required": ["location", "unit"],
+        }
     }
 ]
 
 input_messages = [
-    {"role": "user", "content": "What's the weather like in Paris today?"}
+    {"role": "user", "content": "What is the weather in Paris in Celsius today?"}
 ]
 
 
 def main():
-    base_url = "http://0.0.0.0:8000/v1"
+    base_url = "http://localhost:8000/v1"
     client = OpenAI(base_url=base_url, api_key="empty")
     model = get_first_model(client)
     response = client.responses.create(
-        model=model, input=input_messages, tools=tools, tool_choice="required"
+        model=model, input=input_messages, tools=tools,
+        # tool_choice="required" #this breaks it for custom tools
     )
 
+    print(response)
+    # import fbvscode; fbvscode.set_trace()
+
     for out in response.output:
         if out.type == "function_call":
             print("Function call:", out.name, out.arguments)
             tool_call = out
     args = json.loads(tool_call.arguments)
-    result = get_weather(args["latitude"], args["longitude"])
+    result = get_weather(args["location"], args["unit"])
 
     input_messages.append(tool_call)  # append model's function call message
     input_messages.append(

diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py
@@ -119,6 +119,24 @@
 
 logger = init_logger(__name__)
 
+def convert_tool_schema(tool: dict) -> dict:
+    """
+    Convert a flat tool schema:
+        {"type": "function", "name": "...", "description": "...", "parameters": {...}}
+    into:
+        {"type": "function", "function": {...}}
+    """
+    if tool.get("type") != "function":
+        raise ValueError("Expected tool['type'] == 'function'")
+
+    # Extract everything except 'type' and wrap inside 'function'
+    function_body = {k: v for k, v in tool.items() if k != "type"}
+
+    return {
+        "type": "function",
+        "function": function_body,
+    }
+
 
 def extract_tool_types(tools: list[Tool]) -> set[str]:
     """
@@ -520,12 +538,13 @@
         prev_response: ResponsesResponse | None,
         tokenizer: AnyTokenizer,
     ):
+        import fbvscode; fbvscode.set_trace()
         if request.tools is None or (
             request.tool_choice == "none" and self.exclude_tools_when_tool_choice_none
         ):
             tool_dicts = None
         else:
-            tool_dicts = [tool.model_dump() for tool in request.tools]
+            tool_dicts = [convert_tool_schema(tool.model_dump()) for tool in request.tools]
         # Construct the input messages.
         messages = self._construct_input_messages(request, prev_response)
         _, request_prompts, engine_prompts = await self._preprocess_chat(
@@ -853,7 +872,7 @@
                 content=[output_text],
                 role="assistant",
                 status="completed",
-                type="message",
+                type="message", #this could be a function call output
             )
         outputs = []
 

diff --git a/vllm/entrypoints/openai/tool_parsers/utils.py b/vllm/entrypoints/openai/tool_parsers/utils.py
@@ -224,6 +224,6 @@ def get_json_schema_from_tools(
         return tool_map[tool_name].function.parameters
     # tool_choice: "required"
     if tool_choice == "required":
-        return _get_json_schema_from_tools(tools)
+        return _get_json_schema_from_tools(tools) #this break
     # tool_choice: "auto"
     return None
diff --git a/vllm/reasoning/minimax_m2_reasoning_parser.py b/vllm/reasoning/minimax_m2_reasoning_parser.py
@@ -15,6 +15,8 @@
 
 logger = init_logger(__name__)
 
+import re
+from typing import Tuple
 
 @ReasoningParserManager.register_module("minimax_m2")
 class MiniMaxM2ReasoningParser(BaseThinkingReasoningParser):
@@ -66,4 +68,13 @@
     def extract_reasoning_content(
         self, model_output: str, request: ChatCompletionRequest | ResponsesRequest
     ) -> tuple[str | None, str | None]:
-        return None, "<think>" + model_output
+
+        match = re.search(r"</think>\s*", model_output, re.DOTALL)
+        if not match:
+            return model_output, ""
+
+        end_idx = match.end()
+        before = model_output[:end_idx]
+        after = model_output[end_idx:]
+        return before.strip(), after.strip()
+        # return None, "<think>" + model_output