From e6ad3ae2b6303eaeb17e477b613d81c503cd9bd0 Mon Sep 17 00:00:00 2001 From: Alec Solder Date: Wed, 10 Sep 2025 21:12:39 -0700 Subject: [PATCH 1/8] Adding the ability to use MCP tools for built in tools to pass through headers Signed-off-by: Alec Solder --- vllm/entrypoints/context.py | 15 +++++++++++---- vllm/entrypoints/openai/serving_responses.py | 10 +++++++--- vllm/entrypoints/tool_server.py | 17 +++++++++++++---- 3 files changed, 31 insertions(+), 11 deletions(-) diff --git a/vllm/entrypoints/context.py b/vllm/entrypoints/context.py index 9012639457ca..a88fbdcf7d63 100644 --- a/vllm/entrypoints/context.py +++ b/vllm/entrypoints/context.py @@ -310,14 +310,21 @@ async def call_python_tool(self, tool_session: Union["ClientSession", recipient=Role.ASSISTANT) ] - async def init_tool_sessions(self, tool_server: Optional[ToolServer], - exit_stack: AsyncExitStack, - request_id: str) -> None: + async def init_tool_sessions( + self, + tool_server: Optional[ToolServer], + exit_stack: AsyncExitStack, + request_id: str, + mcp_tool_headers: dict[str, dict[str, str]] = None) -> None: if tool_server: for tool_name in self.available_tools: if tool_name not in self._tool_sessions: tool_session = await exit_stack.enter_async_context( - tool_server.new_session(tool_name, request_id)) + tool_server.new_session( + tool_name, request_id, + mcp_tool_headers.get(tool_name) + if tool_name in mcp_tool_headers else {})) + logger.info("Created new session for %s", tool_name) self._tool_sessions[tool_name] = tool_session exit_stack.push_async_exit(self.cleanup_session) diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py index 401ba6c53331..f9a81466e164 100644 --- a/vllm/entrypoints/openai/serving_responses.py +++ b/vllm/entrypoints/openai/serving_responses.py @@ -714,9 +714,13 @@ def _construct_input_messages_with_harmony( # New conversation. reasoning_effort = (request.reasoning.effort if request.reasoning else None) - # Temporary: OpenAI types doesn't have container tool - # so we used MCP to cover that, up for change - tool_types = [tool.type for tool in request.tools] + # If there is a MCP tool with a matching server_label, then + # the tool is enabled. Native types like code_interpreter work + # as well + tool_types = [ + tool.server_label if tool.type == "mcp" else tool.type + for tool in request.tools + ] if envs.VLLM_GPT_OSS_USE_CONTAINER_TOOL: tool_types.append("container") enable_browser = ("web_search_preview" in tool_types diff --git a/vllm/entrypoints/tool_server.py b/vllm/entrypoints/tool_server.py index 056a571fb2fd..cbbb36994592 100644 --- a/vllm/entrypoints/tool_server.py +++ b/vllm/entrypoints/tool_server.py @@ -86,8 +86,12 @@ def get_tool_description(self, pass @abstractmethod - def new_session(self, tool_name: str, - session_id: str) -> AbstractAsyncContextManager[Any]: + def new_session( + self, + tool_name: str, + session_id: str, + headers: Optional[dict[str, str]] = None + ) -> AbstractAsyncContextManager[Any]: """ Create a session for the tool. """ @@ -144,11 +148,16 @@ def get_tool_description(self, tool_name: str): return self.harmony_tool_descriptions.get(tool_name) @asynccontextmanager - async def new_session(self, tool_name: str, session_id: str): + async def new_session(self, + tool_name: str, + session_id: str, + headers: Optional[dict[str, str]] = None): from mcp import ClientSession from mcp.client.sse import sse_client url = self.urls.get(tool_name) - headers = {"x-session-id": session_id} + if headers is None: + headers = {} + headers = {"x-session-id": session_id, **headers} if not url: raise KeyError(f"Tool '{tool_name}' is not supported") async with sse_client(url=url, From 69b7e0428580a672e958bc5116bea8d148c7ff92 Mon Sep 17 00:00:00 2001 From: Alec Solder Date: Mon, 15 Sep 2025 12:09:01 -0700 Subject: [PATCH 2/8] Adding one unit test which works locally, fixing type to name translation issues Signed-off-by: Alec Solder --- .../openai/test_response_api_with_harmony.py | 18 ++++++++ vllm/entrypoints/context.py | 42 ++++++++++++------- vllm/entrypoints/harmony_utils.py | 4 +- vllm/entrypoints/openai/serving_responses.py | 12 +++++- vllm/entrypoints/tool_server.py | 18 ++++---- 5 files changed, 69 insertions(+), 25 deletions(-) diff --git a/tests/entrypoints/openai/test_response_api_with_harmony.py b/tests/entrypoints/openai/test_response_api_with_harmony.py index 0d5836fab5a7..3df433766db5 100644 --- a/tests/entrypoints/openai/test_response_api_with_harmony.py +++ b/tests/entrypoints/openai/test_response_api_with_harmony.py @@ -373,6 +373,24 @@ async def test_code_interpreter(client: OpenAI, model_name: str): assert response.status == "completed" +@pytest.mark.asyncio +@pytest.mark.parametrize("model_name", [MODEL_NAME]) +@pytest.mark.skip(reason="Code interpreter tool is not available in CI yet.") +async def test_mcp_tool(client: OpenAI, model_name: str): + response = await client.responses.create( + model=model_name, + input="Please multiply 123 and 456 using the available python tool.", + tools=[{ + "type": "mcp", + "server_label": "code_interpreter", + # URL unused for DemoToolServer + "server_url": "http://localhost:8888" + }], + ) + assert response is not None + assert response.status == "completed" + + def get_weather(latitude, longitude): response = requests.get( f"https://api.open-meteo.com/v1/forecast?latitude={latitude}&longitude={longitude}¤t=temperature_2m,wind_speed_10m&hourly=temperature_2m,relative_humidity_2m,wind_speed_10m" # noqa diff --git a/vllm/entrypoints/context.py b/vllm/entrypoints/context.py index a88fbdcf7d63..42332ccec5f0 100644 --- a/vllm/entrypoints/context.py +++ b/vllm/entrypoints/context.py @@ -8,6 +8,7 @@ from contextlib import AsyncExitStack from typing import TYPE_CHECKING, Optional, Union +from openai.types.responses.tool import Mcp from openai_harmony import Author, Message, Role, StreamState, TextContent from vllm.entrypoints.harmony_utils import ( @@ -22,6 +23,21 @@ logger = logging.getLogger(__name__) +# This is currently needed as the tool type doesn't 1:1 match the +# tool namespace, which is what is used to look up the +# connection to the tool server +def _map_tool_name_to_tool_type(tool_name: str) -> str: + if tool_name == "browser": + return "web_search_preview" + elif tool_name == "python": + return "code_interpreter" + elif tool_name == "container": + return "container" + else: + raise ValueError( + f"Built in tool name not defined in mapping: {tool_name}") + + class TurnTokens: """Tracks token counts for a single conversation turn.""" @@ -59,8 +75,8 @@ def render_for_completion(self) -> list[int]: @abstractmethod async def init_tool_sessions(self, tool_server: Optional[ToolServer], - exit_stack: AsyncExitStack, - request_id: str) -> None: + exit_stack: AsyncExitStack, request_id: str, + mcp_tools: dict[str, Mcp]) -> None: pass @abstractmethod @@ -96,8 +112,8 @@ def render_for_completion(self) -> list[int]: raise NotImplementedError("Should not be called.") async def init_tool_sessions(self, tool_server: Optional[ToolServer], - exit_stack: AsyncExitStack, - request_id: str) -> None: + exit_stack: AsyncExitStack, request_id: str, + mcp_tools: dict[str, Mcp]) -> None: pass async def cleanup_session(self) -> None: @@ -310,20 +326,18 @@ async def call_python_tool(self, tool_session: Union["ClientSession", recipient=Role.ASSISTANT) ] - async def init_tool_sessions( - self, - tool_server: Optional[ToolServer], - exit_stack: AsyncExitStack, - request_id: str, - mcp_tool_headers: dict[str, dict[str, str]] = None) -> None: + async def init_tool_sessions(self, tool_server: Optional[ToolServer], + exit_stack: AsyncExitStack, request_id: str, + mcp_tools: dict[str, Mcp]): if tool_server: for tool_name in self.available_tools: if tool_name not in self._tool_sessions: + tool_type = _map_tool_name_to_tool_type(tool_name) + headers = mcp_tools[ + tool_type].headers if tool_type in mcp_tools else None tool_session = await exit_stack.enter_async_context( - tool_server.new_session( - tool_name, request_id, - mcp_tool_headers.get(tool_name) - if tool_name in mcp_tool_headers else {})) + tool_server.new_session(tool_name, request_id, + headers)) logger.info("Created new session for %s", tool_name) self._tool_sessions[tool_name] = tool_session exit_stack.push_async_exit(self.cleanup_session) diff --git a/vllm/entrypoints/harmony_utils.py b/vllm/entrypoints/harmony_utils.py index f7528ba81dce..23a2234274c0 100644 --- a/vllm/entrypoints/harmony_utils.py +++ b/vllm/entrypoints/harmony_utils.py @@ -126,8 +126,10 @@ def get_developer_message( function_tools: list[Union[Tool, ChatCompletionToolsParam]] = [] for tool in tools: if tool.type in ("web_search_preview", "code_interpreter", - "container"): + "container", "mcp"): # These are built-in tools that are added to the system message. + # Adding in MCP for now until we support MCP tools executed + # server side pass elif tool.type == "function": diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py index f9a81466e164..402f2d643c70 100644 --- a/vllm/entrypoints/openai/serving_responses.py +++ b/vllm/entrypoints/openai/serving_responses.py @@ -451,8 +451,12 @@ async def responses_full_generator( async with AsyncExitStack() as exit_stack: try: + mcp_tools = { + tool.server_label: tool + for tool in request.tools if tool.type == "mcp" + } await context.init_tool_sessions(self.tool_server, exit_stack, - request.request_id) + request.request_id, mcp_tools) async for _ in result_generator: pass except asyncio.CancelledError: @@ -1635,8 +1639,12 @@ def _send_event(event: BaseModel): async with AsyncExitStack() as exit_stack: processer = None if self.use_harmony: + mcp_tools = { + tool.server_label: tool + for tool in request.tools if tool.type == "mcp" + } await context.init_tool_sessions(self.tool_server, exit_stack, - request.request_id) + request.request_id, mcp_tools) processer = self._process_harmony_streaming_events else: processer = self._process_simple_streaming_events diff --git a/vllm/entrypoints/tool_server.py b/vllm/entrypoints/tool_server.py index cbbb36994592..4c627b865ef9 100644 --- a/vllm/entrypoints/tool_server.py +++ b/vllm/entrypoints/tool_server.py @@ -18,7 +18,6 @@ async def list_server_and_tools(server_url: str): from mcp import ClientSession from mcp.client.sse import sse_client - async with sse_client(url=server_url) as streams, ClientSession( *streams) as session: initialize_response = await session.initialize() @@ -155,14 +154,14 @@ async def new_session(self, from mcp import ClientSession from mcp.client.sse import sse_client url = self.urls.get(tool_name) - if headers is None: - headers = {} - headers = {"x-session-id": session_id, **headers} + request_headers = {"x-session-id": session_id} + if headers is not None: + request_headers.update(headers) if not url: raise KeyError(f"Tool '{tool_name}' is not supported") - async with sse_client(url=url, - headers=headers) as streams, ClientSession( - *streams) as session: + async with sse_client( + url=url, headers=request_headers) as streams, ClientSession( + *streams) as session: await session.initialize() yield session @@ -198,7 +197,10 @@ def get_tool_description(self, raise ValueError(f"Unknown tool {tool_name}") @asynccontextmanager - async def new_session(self, tool_name: str, session_id: str): + async def new_session(self, + tool_name: str, + session_id: str, + headers: Optional[dict[str, str]] = None): if tool_name not in self.tools: raise KeyError(f"Tool '{tool_name}' is not supported") yield self.tools[tool_name] From f0553bed90f591b63da1eb97a9cfc7db0696464d Mon Sep 17 00:00:00 2001 From: Alec Solder Date: Mon, 15 Sep 2025 12:13:02 -0700 Subject: [PATCH 3/8] Removing log line Signed-off-by: Alec Solder --- vllm/entrypoints/context.py | 1 - 1 file changed, 1 deletion(-) diff --git a/vllm/entrypoints/context.py b/vllm/entrypoints/context.py index 42332ccec5f0..f5449e9732f0 100644 --- a/vllm/entrypoints/context.py +++ b/vllm/entrypoints/context.py @@ -338,7 +338,6 @@ async def init_tool_sessions(self, tool_server: Optional[ToolServer], tool_session = await exit_stack.enter_async_context( tool_server.new_session(tool_name, request_id, headers)) - logger.info("Created new session for %s", tool_name) self._tool_sessions[tool_name] = tool_session exit_stack.push_async_exit(self.cleanup_session) From 94162b022292669ca328751878a532f176ca8c17 Mon Sep 17 00:00:00 2001 From: Alec Solder Date: Tue, 16 Sep 2025 12:51:48 -0700 Subject: [PATCH 4/8] Enabling oss coding tool for tests Signed-off-by: Alec Solder --- .../openai/test_response_api_with_harmony.py | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/tests/entrypoints/openai/test_response_api_with_harmony.py b/tests/entrypoints/openai/test_response_api_with_harmony.py index 3df433766db5..9fb34316ec02 100644 --- a/tests/entrypoints/openai/test_response_api_with_harmony.py +++ b/tests/entrypoints/openai/test_response_api_with_harmony.py @@ -28,6 +28,8 @@ def server(monkeypatch_module: pytest.MonkeyPatch): with monkeypatch_module.context() as m: m.setenv("VLLM_ENABLE_RESPONSES_API_STORE", "1") + # This is necessary to test with code_interpreter on the demo server + m.setenv("PYTHON_EXECUTION_BACKEND", "dangerously_use_uv") with RemoteOpenAIServer(MODEL_NAME, args) as remote_server: yield remote_server @@ -357,11 +359,16 @@ async def test_web_search(client: OpenAI, model_name: str): @pytest.mark.asyncio @pytest.mark.parametrize("model_name", [MODEL_NAME]) -@pytest.mark.skip(reason="Code interpreter tool is not available in CI yet.") async def test_code_interpreter(client: OpenAI, model_name: str): response = await client.responses.create( model=model_name, - input="Multiply 64548*15151 using builtin python interpreter.", + # TODO: Ideally should be able to set max tool calls + # to prevent multi-turn, but it is not currently supported + # would speed up the test + input=("What's the first 4 digits after the decimal point of " + "cube root of `19910212 * 20250910`? " + "Show only the digits. The python interpreter is not stateful " + "and you must print to see the output."), tools=[{ "type": "code_interpreter", "container": { @@ -371,15 +378,21 @@ async def test_code_interpreter(client: OpenAI, model_name: str): ) assert response is not None assert response.status == "completed" + assert response.usage.output_tokens_details.tool_output_tokens > 0 @pytest.mark.asyncio @pytest.mark.parametrize("model_name", [MODEL_NAME]) -@pytest.mark.skip(reason="Code interpreter tool is not available in CI yet.") async def test_mcp_tool(client: OpenAI, model_name: str): response = await client.responses.create( model=model_name, - input="Please multiply 123 and 456 using the available python tool.", + # TODO: Ideally should be able to set max tool calls + # to prevent multi-turn, but it is not currently supported + # would speed up the test + input=("What's the first 4 digits after the decimal point of " + "cube root of `19910212 * 20250910`? " + "Show only the digits. The python interpreter is not stateful " + "and you must print to see the output."), tools=[{ "type": "mcp", "server_label": "code_interpreter", @@ -389,6 +402,7 @@ async def test_mcp_tool(client: OpenAI, model_name: str): ) assert response is not None assert response.status == "completed" + assert response.usage.output_tokens_details.tool_output_tokens > 0 def get_weather(latitude, longitude): From 8bc171d0c037b044f6d063c49c3b73f8b828a024 Mon Sep 17 00:00:00 2001 From: Alec Solder Date: Tue, 16 Sep 2025 16:10:32 -0700 Subject: [PATCH 5/8] adding the skip back Signed-off-by: Alec Solder --- tests/entrypoints/openai/test_response_api_with_harmony.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/entrypoints/openai/test_response_api_with_harmony.py b/tests/entrypoints/openai/test_response_api_with_harmony.py index 73d55589b636..b2e6cd8ae697 100644 --- a/tests/entrypoints/openai/test_response_api_with_harmony.py +++ b/tests/entrypoints/openai/test_response_api_with_harmony.py @@ -28,8 +28,6 @@ def server(monkeypatch_module: pytest.MonkeyPatch): with monkeypatch_module.context() as m: m.setenv("VLLM_ENABLE_RESPONSES_API_STORE", "1") - # This is necessary to test with code_interpreter on the demo server - m.setenv("PYTHON_EXECUTION_BACKEND", "dangerously_use_uv") with RemoteOpenAIServer(MODEL_NAME, args) as remote_server: yield remote_server @@ -396,6 +394,7 @@ async def test_web_search(client: OpenAI, model_name: str): @pytest.mark.asyncio @pytest.mark.parametrize("model_name", [MODEL_NAME]) +@pytest.mark.skip(reason="Code interpreter tool is not available in CI yet.") async def test_code_interpreter(client: OpenAI, model_name: str): response = await client.responses.create( model=model_name, @@ -420,6 +419,7 @@ async def test_code_interpreter(client: OpenAI, model_name: str): @pytest.mark.asyncio @pytest.mark.parametrize("model_name", [MODEL_NAME]) +@pytest.mark.skip(reason="Code interpreter tool is not available in CI yet.") async def test_mcp_tool(client: OpenAI, model_name: str): response = await client.responses.create( model=model_name, From aa572f5d80dfb3b18798b4b15a49e50f5c024550 Mon Sep 17 00:00:00 2001 From: Alec Solder Date: Fri, 19 Sep 2025 14:09:00 -0700 Subject: [PATCH 6/8] Moving MCP tests to separate file, adding env variable for allowing mcp tools to enable built in tools Signed-off-by: Alec Solder --- .../openai/test_response_api_mcp_tools.py | 107 ++++++++++++++++++ .../openai/test_response_api_with_harmony.py | 25 ---- vllm/entrypoints/openai/serving_responses.py | 19 ++-- vllm/envs.py | 10 +- 4 files changed, 122 insertions(+), 39 deletions(-) create mode 100644 tests/entrypoints/openai/test_response_api_mcp_tools.py diff --git a/tests/entrypoints/openai/test_response_api_mcp_tools.py b/tests/entrypoints/openai/test_response_api_mcp_tools.py new file mode 100644 index 000000000000..d24c557b79c0 --- /dev/null +++ b/tests/entrypoints/openai/test_response_api_mcp_tools.py @@ -0,0 +1,107 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +import pytest +import pytest_asyncio +from openai import OpenAI + +from ...utils import RemoteOpenAIServer + +MODEL_NAME = "openai/gpt-oss-20b" + + +@pytest.fixture(scope="module") +def monkeypatch_module(): + from _pytest.monkeypatch import MonkeyPatch + mpatch = MonkeyPatch() + yield mpatch + mpatch.undo() + + +@pytest.fixture(scope="module") +def mcp_disabled_server(monkeypatch_module: pytest.MonkeyPatch): + args = ["--enforce-eager", "--tool-server", "demo"] + + with monkeypatch_module.context() as m: + m.setenv("VLLM_ENABLE_RESPONSES_API_STORE", "1") + m.setenv("PYTHON_EXECUTION_BACKEND", "dangerously_use_uv") + with RemoteOpenAIServer(MODEL_NAME, args) as remote_server: + yield remote_server + + +@pytest.fixture(scope="function") +def mcp_enabled_server(monkeypatch_module: pytest.MonkeyPatch): + args = ["--enforce-eager", "--tool-server", "demo"] + + with monkeypatch_module.context() as m: + m.setenv("VLLM_ENABLE_RESPONSES_API_STORE", "1") + m.setenv("PYTHON_EXECUTION_BACKEND", "dangerously_use_uv") + m.setenv("GPT_OSS_SYSTEM_TOOL_MCP_LABELS", + "code_interpreter,container") + with RemoteOpenAIServer(MODEL_NAME, args) as remote_server: + yield remote_server + + +@pytest_asyncio.fixture +async def mcp_disabled_client(mcp_disabled_server): + async with mcp_disabled_server.get_async_client() as async_client: + yield async_client + + +@pytest_asyncio.fixture +async def mcp_enabled_client(mcp_enabled_server): + async with mcp_enabled_server.get_async_client() as async_client: + yield async_client + + +@pytest.mark.asyncio +@pytest.mark.parametrize("model_name", [MODEL_NAME]) +@pytest.mark.skip(reason="Code interpreter tool is not available in CI yet.") +async def test_mcp_tool_env_flag_enabled(mcp_enabled_client: OpenAI, + model_name: str): + response = await mcp_enabled_client.responses.create( + model=model_name, + # TODO: Ideally should be able to set max tool calls + # to prevent multi-turn, but it is not currently supported + # would speed up the test + input=("What's the first 4 digits after the decimal point of " + "cube root of `19910212 * 20250910`? " + "Show only the digits. The python interpreter is not stateful " + "and you must print to see the output."), + tools=[{ + "type": "mcp", + "server_label": "code_interpreter", + # URL unused for DemoToolServer + "server_url": "http://localhost:8888" + }], + ) + assert response is not None + assert response.status == "completed" + assert response.usage.output_tokens_details.tool_output_tokens > 0 + + +@pytest.mark.asyncio +@pytest.mark.parametrize("model_name", [MODEL_NAME]) +@pytest.mark.skip(reason="Code interpreter tool is not available in CI yet.") +async def test_mcp_tool_env_flag_disabled( + mcp_disabled_client: OpenAI, model_name: str, + monkeypatch_module: pytest.MonkeyPatch): + response = await mcp_disabled_client.responses.create( + model=model_name, + # TODO: Ideally should be able to set max tool calls + # to prevent multi-turn, but it is not currently supported + # would speed up the test + input=("What's the first 4 digits after the decimal point of " + "cube root of `19910212 * 20250910`? " + "Show only the digits. The python interpreter is not stateful " + "and you must print to see the output."), + tools=[{ + "type": "mcp", + "server_label": "code_interpreter", + # URL unused for DemoToolServer + "server_url": "http://localhost:8888" + }], + ) + assert response is not None + assert response.status == "completed" + assert response.usage.output_tokens_details.tool_output_tokens == 0 diff --git a/tests/entrypoints/openai/test_response_api_with_harmony.py b/tests/entrypoints/openai/test_response_api_with_harmony.py index b2e6cd8ae697..b219b03d0795 100644 --- a/tests/entrypoints/openai/test_response_api_with_harmony.py +++ b/tests/entrypoints/openai/test_response_api_with_harmony.py @@ -417,31 +417,6 @@ async def test_code_interpreter(client: OpenAI, model_name: str): assert response.usage.output_tokens_details.tool_output_tokens > 0 -@pytest.mark.asyncio -@pytest.mark.parametrize("model_name", [MODEL_NAME]) -@pytest.mark.skip(reason="Code interpreter tool is not available in CI yet.") -async def test_mcp_tool(client: OpenAI, model_name: str): - response = await client.responses.create( - model=model_name, - # TODO: Ideally should be able to set max tool calls - # to prevent multi-turn, but it is not currently supported - # would speed up the test - input=("What's the first 4 digits after the decimal point of " - "cube root of `19910212 * 20250910`? " - "Show only the digits. The python interpreter is not stateful " - "and you must print to see the output."), - tools=[{ - "type": "mcp", - "server_label": "code_interpreter", - # URL unused for DemoToolServer - "server_url": "http://localhost:8888" - }], - ) - assert response is not None - assert response.status == "completed" - assert response.usage.output_tokens_details.tool_output_tokens > 0 - - def get_weather(latitude, longitude): response = requests.get( f"https://api.open-meteo.com/v1/forecast?latitude={latitude}&longitude={longitude}¤t=temperature_2m,wind_speed_10m&hourly=temperature_2m,relative_humidity_2m,wind_speed_10m" # noqa diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py index d8c7ebded97d..7a700d0bbee2 100644 --- a/vllm/entrypoints/openai/serving_responses.py +++ b/vllm/entrypoints/openai/serving_responses.py @@ -732,15 +732,16 @@ def _construct_input_messages_with_harmony( # New conversation. reasoning_effort = (request.reasoning.effort if request.reasoning else None) - # If there is a MCP tool with a matching server_label, then - # the tool is enabled. Native types like code_interpreter work - # as well - tool_types = [ - tool.server_label if tool.type == "mcp" else tool.type - for tool in request.tools - ] - if envs.VLLM_GPT_OSS_USE_CONTAINER_TOOL: - tool_types.append("container") + tool_types = [tool.type for tool in request.tools] + + # Allow the MCP Tool type to enable built in tools if the + # server_label is allowlisted in + # envs.GPT_OSS_SYSTEM_TOOL_MCP_LABELS + if envs.GPT_OSS_SYSTEM_TOOL_MCP_LABELS: + for tool in request.tools: + if (tool.type == "mcp" and tool.server_label + in envs.GPT_OSS_SYSTEM_TOOL_MCP_LABELS): + tool_types.append(tool.server_label) enable_browser = ("web_search_preview" in tool_types and self.tool_server is not None and self.tool_server.has_tool("browser")) diff --git a/vllm/envs.py b/vllm/envs.py index d2006979ea81..cbf99d853546 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -174,11 +174,11 @@ VLLM_ALLREDUCE_USE_SYMM_MEM: bool = False VLLM_TUNED_CONFIG_FOLDER: Optional[str] = None VLLM_DISABLE_PAD_FOR_CUDAGRAPH: bool = False - VLLM_GPT_OSS_USE_CONTAINER_TOOL: bool = False VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS: bool = False VLLM_CUSTOM_SCOPES_FOR_PROFILING: bool = False VLLM_KV_EVENTS_USE_INT_BLOCK_HASHES: bool = True VLLM_OBJECT_STORAGE_SHM_BUFFER_NAME: str = "VLLM_OBJECT_STORAGE_SHM_BUFFER" + GPT_OSS_SYSTEM_TOOL_MCP_LABELS: list[str] = [] def get_default_cache_root(): @@ -1246,10 +1246,6 @@ def get_vllm_port() -> Optional[int]: "VLLM_TUNED_CONFIG_FOLDER": lambda: os.getenv("VLLM_TUNED_CONFIG_FOLDER", None), - # Allows vllm use container tool - "VLLM_GPT_OSS_USE_CONTAINER_TOOL": - lambda: bool(int(os.getenv("VLLM_GPT_OSS_USE_CONTAINER_TOOL", "0"))), - # Allows harmony instructions to be injected on system messages "VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS": lambda: bool( @@ -1269,6 +1265,10 @@ def get_vllm_port() -> Optional[int]: "VLLM_OBJECT_STORAGE_SHM_BUFFER_NAME": lambda: os.getenv("VLLM_OBJECT_STORAGE_SHM_BUFFER_NAME", "VLLM_OBJECT_STORAGE_SHM_BUFFER"), + + "GPT_OSS_SYSTEM_TOOL_MCP_LABELS": + lambda: ([] if "GPT_OSS_SYSTEM_TOOL_MCP_LABELS" not in os.environ + else os.environ["GPT_OSS_SYSTEM_TOOL_MCP_LABELS"].split(",")), } # --8<-- [end:env-vars-definition] From 0bd5e5359acee9e1d49a84c324be42dd69553a47 Mon Sep 17 00:00:00 2001 From: Alec Solder Date: Fri, 19 Sep 2025 15:38:52 -0700 Subject: [PATCH 7/8] Fixing nits and adding env variable validation Signed-off-by: Alec Solder --- .../openai/test_response_api_mcp_tools.py | 5 +- vllm/entrypoints/context.py | 21 ++++--- vllm/envs.py | 60 ++++++++++++++++++- 3 files changed, 72 insertions(+), 14 deletions(-) diff --git a/tests/entrypoints/openai/test_response_api_mcp_tools.py b/tests/entrypoints/openai/test_response_api_mcp_tools.py index d24c557b79c0..b0eb84712c19 100644 --- a/tests/entrypoints/openai/test_response_api_mcp_tools.py +++ b/tests/entrypoints/openai/test_response_api_mcp_tools.py @@ -83,9 +83,8 @@ async def test_mcp_tool_env_flag_enabled(mcp_enabled_client: OpenAI, @pytest.mark.asyncio @pytest.mark.parametrize("model_name", [MODEL_NAME]) @pytest.mark.skip(reason="Code interpreter tool is not available in CI yet.") -async def test_mcp_tool_env_flag_disabled( - mcp_disabled_client: OpenAI, model_name: str, - monkeypatch_module: pytest.MonkeyPatch): +async def test_mcp_tool_env_flag_disabled(mcp_disabled_client: OpenAI, + model_name: str): response = await mcp_disabled_client.responses.create( model=model_name, # TODO: Ideally should be able to set max tool calls diff --git a/vllm/entrypoints/context.py b/vllm/entrypoints/context.py index 793d8f63083c..ea81fdbcd825 100644 --- a/vllm/entrypoints/context.py +++ b/vllm/entrypoints/context.py @@ -22,20 +22,23 @@ logger = logging.getLogger(__name__) - # This is currently needed as the tool type doesn't 1:1 match the # tool namespace, which is what is used to look up the # connection to the tool server +_TOOL_NAME_TO_TYPE_MAP = { + "browser": "web_search_preview", + "python": "code_interpreter", + "container": "container", +} + + def _map_tool_name_to_tool_type(tool_name: str) -> str: - if tool_name == "browser": - return "web_search_preview" - elif tool_name == "python": - return "code_interpreter" - elif tool_name == "container": - return "container" - else: + if tool_name not in _TOOL_NAME_TO_TYPE_MAP: + available_tools = ', '.join(_TOOL_NAME_TO_TYPE_MAP.keys()) raise ValueError( - f"Built in tool name not defined in mapping: {tool_name}") + f"Built-in tool name '{tool_name}' not defined in mapping. " + f"Available tools: {available_tools}") + return _TOOL_NAME_TO_TYPE_MAP[tool_name] class TurnTokens: diff --git a/vllm/envs.py b/vllm/envs.py index c5e29419ecc8..c71597054287 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -257,6 +257,58 @@ def _get_validated_env() -> Optional[str]: return _get_validated_env +def env_list_with_choices( + env_name: str, + default: list[str], + choices: Union[list[str], Callable[[], list[str]]], + case_sensitive: bool = True) -> Callable[[], list[str]]: + """ + Create a lambda that validates environment variable + containing comma-separated values against allowed choices + + Args: + env_name: Name of the environment variable + default: Default list of values if not set + choices: List of valid string options or callable that returns list + case_sensitive: Whether validation should be case sensitive + + Returns: + Lambda function for environment_variables + dict that returns list of strings + """ + + def _get_validated_env_list() -> list[str]: + value = os.getenv(env_name) + if value is None: + return default + + # Split comma-separated values and strip whitespace + values = [v.strip() for v in value.split(",") if v.strip()] + + if not values: + return default + + # Resolve choices if it's a callable (for lazy loading) + actual_choices = choices() if callable(choices) else choices + + # Validate each value + for val in values: + if not case_sensitive: + check_value = val.lower() + check_choices = [choice.lower() for choice in actual_choices] + else: + check_value = val + check_choices = actual_choices + + if check_value not in check_choices: + raise ValueError(f"Invalid value '{val}' in {env_name}. " + f"Valid options: {actual_choices}.") + + return values + + return _get_validated_env_list + + def get_vllm_port() -> Optional[int]: """Get the port from VLLM_PORT environment variable. @@ -1329,9 +1381,13 @@ def get_vllm_port() -> Optional[int]: lambda: os.getenv("VLLM_OBJECT_STORAGE_SHM_BUFFER_NAME", "VLLM_OBJECT_STORAGE_SHM_BUFFER"), + # Valid values are container,code_interpreter,web_search_preview + # ex GPT_OSS_SYSTEM_TOOL_MCP_LABELS=container,code_interpreter "GPT_OSS_SYSTEM_TOOL_MCP_LABELS": - lambda: ([] if "GPT_OSS_SYSTEM_TOOL_MCP_LABELS" not in os.environ - else os.environ["GPT_OSS_SYSTEM_TOOL_MCP_LABELS"].split(",")), + env_list_with_choices("GPT_OSS_SYSTEM_TOOL_MCP_LABELS", [], + ["container", + "code_interpreter", + "web_search_preview"]), } # --8<-- [end:env-vars-definition] From 823414a511f35e85d9417e728cbff95f371d8b89 Mon Sep 17 00:00:00 2001 From: Alec Solder Date: Mon, 22 Sep 2025 13:03:40 -0700 Subject: [PATCH 8/8] Adding tests for env variable parsing Signed-off-by: Alec Solder --- tests/test_envs.py | 216 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 216 insertions(+) create mode 100644 tests/test_envs.py diff --git a/tests/test_envs.py b/tests/test_envs.py new file mode 100644 index 000000000000..f81a6e2e415c --- /dev/null +++ b/tests/test_envs.py @@ -0,0 +1,216 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +import os +from unittest.mock import patch + +import pytest + +from vllm.envs import env_list_with_choices, env_with_choices + + +class TestEnvWithChoices: + """Test cases for env_with_choices function.""" + + def test_default_value_returned_when_env_not_set(self): + """Test default is returned when env var is not set.""" + env_func = env_with_choices("NONEXISTENT_ENV", "default", + ["option1", "option2"]) + assert env_func() == "default" + + def test_none_default_returned_when_env_not_set(self): + """Test that None is returned when env not set and default is None.""" + env_func = env_with_choices("NONEXISTENT_ENV", None, + ["option1", "option2"]) + assert env_func() is None + + def test_valid_value_returned_case_sensitive(self): + """Test that valid value is returned in case sensitive mode.""" + with patch.dict(os.environ, {"TEST_ENV": "option1"}): + env_func = env_with_choices("TEST_ENV", + "default", ["option1", "option2"], + case_sensitive=True) + assert env_func() == "option1" + + def test_valid_lowercase_value_returned_case_insensitive(self): + """Test that lowercase value is accepted in case insensitive mode.""" + with patch.dict(os.environ, {"TEST_ENV": "option1"}): + env_func = env_with_choices("TEST_ENV", + "default", ["OPTION1", "OPTION2"], + case_sensitive=False) + assert env_func() == "option1" + + def test_valid_uppercase_value_returned_case_insensitive(self): + """Test that uppercase value is accepted in case insensitive mode.""" + with patch.dict(os.environ, {"TEST_ENV": "OPTION1"}): + env_func = env_with_choices("TEST_ENV", + "default", ["option1", "option2"], + case_sensitive=False) + assert env_func() == "OPTION1" + + def test_invalid_value_raises_error_case_sensitive(self): + """Test that invalid value raises ValueError in case sensitive mode.""" + with patch.dict(os.environ, {"TEST_ENV": "invalid"}): + env_func = env_with_choices("TEST_ENV", + "default", ["option1", "option2"], + case_sensitive=True) + with pytest.raises(ValueError, + match="Invalid value 'invalid' for TEST_ENV"): + env_func() + + def test_case_mismatch_raises_error_case_sensitive(self): + """Test that case mismatch raises ValueError in case sensitive mode.""" + with patch.dict(os.environ, {"TEST_ENV": "OPTION1"}): + env_func = env_with_choices("TEST_ENV", + "default", ["option1", "option2"], + case_sensitive=True) + with pytest.raises(ValueError, + match="Invalid value 'OPTION1' for TEST_ENV"): + env_func() + + def test_invalid_value_raises_error_case_insensitive(self): + """Test that invalid value raises ValueError when case insensitive.""" + with patch.dict(os.environ, {"TEST_ENV": "invalid"}): + env_func = env_with_choices("TEST_ENV", + "default", ["option1", "option2"], + case_sensitive=False) + with pytest.raises(ValueError, + match="Invalid value 'invalid' for TEST_ENV"): + env_func() + + def test_callable_choices_resolved_correctly(self): + """Test that callable choices are resolved correctly.""" + + def get_choices(): + return ["dynamic1", "dynamic2"] + + with patch.dict(os.environ, {"TEST_ENV": "dynamic1"}): + env_func = env_with_choices("TEST_ENV", "default", get_choices) + assert env_func() == "dynamic1" + + def test_callable_choices_with_invalid_value(self): + """Test that callable choices raise error for invalid values.""" + + def get_choices(): + return ["dynamic1", "dynamic2"] + + with patch.dict(os.environ, {"TEST_ENV": "invalid"}): + env_func = env_with_choices("TEST_ENV", "default", get_choices) + with pytest.raises(ValueError, + match="Invalid value 'invalid' for TEST_ENV"): + env_func() + + +class TestEnvListWithChoices: + """Test cases for env_list_with_choices function.""" + + def test_default_list_returned_when_env_not_set(self): + """Test that default list is returned when env var is not set.""" + env_func = env_list_with_choices("NONEXISTENT_ENV", + ["default1", "default2"], + ["option1", "option2"]) + assert env_func() == ["default1", "default2"] + + def test_empty_default_list_returned_when_env_not_set(self): + """Test that empty default list is returned when env not set.""" + env_func = env_list_with_choices("NONEXISTENT_ENV", [], + ["option1", "option2"]) + assert env_func() == [] + + def test_single_valid_value_parsed_correctly(self): + """Test that single valid value is parsed correctly.""" + with patch.dict(os.environ, {"TEST_ENV": "option1"}): + env_func = env_list_with_choices("TEST_ENV", [], + ["option1", "option2"]) + assert env_func() == ["option1"] + + def test_multiple_valid_values_parsed_correctly(self): + """Test that multiple valid values are parsed correctly.""" + with patch.dict(os.environ, {"TEST_ENV": "option1,option2"}): + env_func = env_list_with_choices("TEST_ENV", [], + ["option1", "option2"]) + assert env_func() == ["option1", "option2"] + + def test_values_with_whitespace_trimmed(self): + """Test that values with whitespace are trimmed correctly.""" + with patch.dict(os.environ, {"TEST_ENV": " option1 , option2 "}): + env_func = env_list_with_choices("TEST_ENV", [], + ["option1", "option2"]) + assert env_func() == ["option1", "option2"] + + def test_empty_values_filtered_out(self): + """Test that empty values are filtered out.""" + with patch.dict(os.environ, {"TEST_ENV": "option1,,option2,"}): + env_func = env_list_with_choices("TEST_ENV", [], + ["option1", "option2"]) + assert env_func() == ["option1", "option2"] + + def test_empty_string_returns_default(self): + """Test that empty string returns default.""" + with patch.dict(os.environ, {"TEST_ENV": ""}): + env_func = env_list_with_choices("TEST_ENV", ["default"], + ["option1", "option2"]) + assert env_func() == ["default"] + + def test_only_commas_returns_default(self): + """Test that string with only commas returns default.""" + with patch.dict(os.environ, {"TEST_ENV": ",,,"}): + env_func = env_list_with_choices("TEST_ENV", ["default"], + ["option1", "option2"]) + assert env_func() == ["default"] + + def test_case_sensitive_validation(self): + """Test case sensitive validation.""" + with patch.dict(os.environ, {"TEST_ENV": "option1,OPTION2"}): + env_func = env_list_with_choices("TEST_ENV", [], + ["option1", "option2"], + case_sensitive=True) + with pytest.raises(ValueError, + match="Invalid value 'OPTION2' in TEST_ENV"): + env_func() + + def test_case_insensitive_validation(self): + """Test case insensitive validation.""" + with patch.dict(os.environ, {"TEST_ENV": "OPTION1,option2"}): + env_func = env_list_with_choices("TEST_ENV", [], + ["option1", "option2"], + case_sensitive=False) + assert env_func() == ["OPTION1", "option2"] + + def test_invalid_value_in_list_raises_error(self): + """Test that invalid value in list raises ValueError.""" + with patch.dict(os.environ, {"TEST_ENV": "option1,invalid,option2"}): + env_func = env_list_with_choices("TEST_ENV", [], + ["option1", "option2"]) + with pytest.raises(ValueError, + match="Invalid value 'invalid' in TEST_ENV"): + env_func() + + def test_callable_choices_resolved_correctly(self): + """Test that callable choices are resolved correctly.""" + + def get_choices(): + return ["dynamic1", "dynamic2"] + + with patch.dict(os.environ, {"TEST_ENV": "dynamic1,dynamic2"}): + env_func = env_list_with_choices("TEST_ENV", [], get_choices) + assert env_func() == ["dynamic1", "dynamic2"] + + def test_callable_choices_with_invalid_value(self): + """Test that callable choices raise error for invalid values.""" + + def get_choices(): + return ["dynamic1", "dynamic2"] + + with patch.dict(os.environ, {"TEST_ENV": "dynamic1,invalid"}): + env_func = env_list_with_choices("TEST_ENV", [], get_choices) + with pytest.raises(ValueError, + match="Invalid value 'invalid' in TEST_ENV"): + env_func() + + def test_duplicate_values_preserved(self): + """Test that duplicate values in the list are preserved.""" + with patch.dict(os.environ, {"TEST_ENV": "option1,option1,option2"}): + env_func = env_list_with_choices("TEST_ENV", [], + ["option1", "option2"]) + assert env_func() == ["option1", "option1", "option2"]