From e6ad3ae2b6303eaeb17e477b613d81c503cd9bd0 Mon Sep 17 00:00:00 2001
From: Alec Solder <alecs@fb.com>
Date: Wed, 10 Sep 2025 21:12:39 -0700
Subject: [PATCH 1/8] Adding the ability to use MCP tools for built in tools to
 pass through headers

Signed-off-by: Alec Solder <alecs@fb.com>
---
 vllm/entrypoints/context.py                  | 15 +++++++++++----
 vllm/entrypoints/openai/serving_responses.py | 10 +++++++---
 vllm/entrypoints/tool_server.py              | 17 +++++++++++++----
 3 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/vllm/entrypoints/context.py b/vllm/entrypoints/context.py
index 9012639457ca..a88fbdcf7d63 100644
--- a/vllm/entrypoints/context.py
+++ b/vllm/entrypoints/context.py
@@ -310,14 +310,21 @@ async def call_python_tool(self, tool_session: Union["ClientSession",
                     recipient=Role.ASSISTANT)
         ]
 
-    async def init_tool_sessions(self, tool_server: Optional[ToolServer],
-                                 exit_stack: AsyncExitStack,
-                                 request_id: str) -> None:
+    async def init_tool_sessions(
+            self,
+            tool_server: Optional[ToolServer],
+            exit_stack: AsyncExitStack,
+            request_id: str,
+            mcp_tool_headers: dict[str, dict[str, str]] = None) -> None:
         if tool_server:
             for tool_name in self.available_tools:
                 if tool_name not in self._tool_sessions:
                     tool_session = await exit_stack.enter_async_context(
-                        tool_server.new_session(tool_name, request_id))
+                        tool_server.new_session(
+                            tool_name, request_id,
+                            mcp_tool_headers.get(tool_name)
+                            if tool_name in mcp_tool_headers else {}))
+                    logger.info("Created new session for %s", tool_name)
                     self._tool_sessions[tool_name] = tool_session
                     exit_stack.push_async_exit(self.cleanup_session)
 
diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py
index 401ba6c53331..f9a81466e164 100644
--- a/vllm/entrypoints/openai/serving_responses.py
+++ b/vllm/entrypoints/openai/serving_responses.py
@@ -714,9 +714,13 @@ def _construct_input_messages_with_harmony(
             # New conversation.
             reasoning_effort = (request.reasoning.effort
                                 if request.reasoning else None)
-            # Temporary: OpenAI types doesn't have container tool
-            # so we used MCP to cover that, up for change
-            tool_types = [tool.type for tool in request.tools]
+            # If there is a MCP tool with a matching server_label, then
+            # the tool is enabled. Native types like code_interpreter work
+            # as well
+            tool_types = [
+                tool.server_label if tool.type == "mcp" else tool.type
+                for tool in request.tools
+            ]
             if envs.VLLM_GPT_OSS_USE_CONTAINER_TOOL:
                 tool_types.append("container")
             enable_browser = ("web_search_preview" in tool_types
diff --git a/vllm/entrypoints/tool_server.py b/vllm/entrypoints/tool_server.py
index 056a571fb2fd..cbbb36994592 100644
--- a/vllm/entrypoints/tool_server.py
+++ b/vllm/entrypoints/tool_server.py
@@ -86,8 +86,12 @@ def get_tool_description(self,
         pass
 
     @abstractmethod
-    def new_session(self, tool_name: str,
-                    session_id: str) -> AbstractAsyncContextManager[Any]:
+    def new_session(
+        self,
+        tool_name: str,
+        session_id: str,
+        headers: Optional[dict[str, str]] = None
+    ) -> AbstractAsyncContextManager[Any]:
         """
         Create a session for the tool.
         """
@@ -144,11 +148,16 @@ def get_tool_description(self, tool_name: str):
         return self.harmony_tool_descriptions.get(tool_name)
 
     @asynccontextmanager
-    async def new_session(self, tool_name: str, session_id: str):
+    async def new_session(self,
+                          tool_name: str,
+                          session_id: str,
+                          headers: Optional[dict[str, str]] = None):
         from mcp import ClientSession
         from mcp.client.sse import sse_client
         url = self.urls.get(tool_name)
-        headers = {"x-session-id": session_id}
+        if headers is None:
+            headers = {}
+        headers = {"x-session-id": session_id, **headers}
         if not url:
             raise KeyError(f"Tool '{tool_name}' is not supported")
         async with sse_client(url=url,

From 69b7e0428580a672e958bc5116bea8d148c7ff92 Mon Sep 17 00:00:00 2001
From: Alec Solder <alecs@fb.com>
Date: Mon, 15 Sep 2025 12:09:01 -0700
Subject: [PATCH 2/8] Adding one unit test which works locally, fixing type to
 name translation issues

Signed-off-by: Alec Solder <alecs@fb.com>
---
 .../openai/test_response_api_with_harmony.py  | 18 ++++++++
 vllm/entrypoints/context.py                   | 42 ++++++++++++-------
 vllm/entrypoints/harmony_utils.py             |  4 +-
 vllm/entrypoints/openai/serving_responses.py  | 12 +++++-
 vllm/entrypoints/tool_server.py               | 18 ++++----
 5 files changed, 69 insertions(+), 25 deletions(-)

diff --git a/tests/entrypoints/openai/test_response_api_with_harmony.py b/tests/entrypoints/openai/test_response_api_with_harmony.py
index 0d5836fab5a7..3df433766db5 100644
--- a/tests/entrypoints/openai/test_response_api_with_harmony.py
+++ b/tests/entrypoints/openai/test_response_api_with_harmony.py
@@ -373,6 +373,24 @@ async def test_code_interpreter(client: OpenAI, model_name: str):
     assert response.status == "completed"
 
 
+@pytest.mark.asyncio
+@pytest.mark.parametrize("model_name", [MODEL_NAME])
+@pytest.mark.skip(reason="Code interpreter tool is not available in CI yet.")
+async def test_mcp_tool(client: OpenAI, model_name: str):
+    response = await client.responses.create(
+        model=model_name,
+        input="Please multiply 123 and 456 using the available python tool.",
+        tools=[{
+            "type": "mcp",
+            "server_label": "code_interpreter",
+            # URL unused for DemoToolServer
+            "server_url": "http://localhost:8888"
+        }],
+    )
+    assert response is not None
+    assert response.status == "completed"
+
+
 def get_weather(latitude, longitude):
     response = requests.get(
         f"https://api.open-meteo.com/v1/forecast?latitude={latitude}&longitude={longitude}&current=temperature_2m,wind_speed_10m&hourly=temperature_2m,relative_humidity_2m,wind_speed_10m"  # noqa
diff --git a/vllm/entrypoints/context.py b/vllm/entrypoints/context.py
index a88fbdcf7d63..42332ccec5f0 100644
--- a/vllm/entrypoints/context.py
+++ b/vllm/entrypoints/context.py
@@ -8,6 +8,7 @@
 from contextlib import AsyncExitStack
 from typing import TYPE_CHECKING, Optional, Union
 
+from openai.types.responses.tool import Mcp
 from openai_harmony import Author, Message, Role, StreamState, TextContent
 
 from vllm.entrypoints.harmony_utils import (
@@ -22,6 +23,21 @@
 logger = logging.getLogger(__name__)
 
 
+# This is currently needed as the tool type doesn't 1:1 match the
+# tool namespace, which is what is used to look up the
+# connection to the tool server
+def _map_tool_name_to_tool_type(tool_name: str) -> str:
+    if tool_name == "browser":
+        return "web_search_preview"
+    elif tool_name == "python":
+        return "code_interpreter"
+    elif tool_name == "container":
+        return "container"
+    else:
+        raise ValueError(
+            f"Built in tool name not defined in mapping: {tool_name}")
+
+
 class TurnTokens:
     """Tracks token counts for a single conversation turn."""
 
@@ -59,8 +75,8 @@ def render_for_completion(self) -> list[int]:
 
     @abstractmethod
     async def init_tool_sessions(self, tool_server: Optional[ToolServer],
-                                 exit_stack: AsyncExitStack,
-                                 request_id: str) -> None:
+                                 exit_stack: AsyncExitStack, request_id: str,
+                                 mcp_tools: dict[str, Mcp]) -> None:
         pass
 
     @abstractmethod
@@ -96,8 +112,8 @@ def render_for_completion(self) -> list[int]:
         raise NotImplementedError("Should not be called.")
 
     async def init_tool_sessions(self, tool_server: Optional[ToolServer],
-                                 exit_stack: AsyncExitStack,
-                                 request_id: str) -> None:
+                                 exit_stack: AsyncExitStack, request_id: str,
+                                 mcp_tools: dict[str, Mcp]) -> None:
         pass
 
     async def cleanup_session(self) -> None:
@@ -310,20 +326,18 @@ async def call_python_tool(self, tool_session: Union["ClientSession",
                     recipient=Role.ASSISTANT)
         ]
 
-    async def init_tool_sessions(
-            self,
-            tool_server: Optional[ToolServer],
-            exit_stack: AsyncExitStack,
-            request_id: str,
-            mcp_tool_headers: dict[str, dict[str, str]] = None) -> None:
+    async def init_tool_sessions(self, tool_server: Optional[ToolServer],
+                                 exit_stack: AsyncExitStack, request_id: str,
+                                 mcp_tools: dict[str, Mcp]):
         if tool_server:
             for tool_name in self.available_tools:
                 if tool_name not in self._tool_sessions:
+                    tool_type = _map_tool_name_to_tool_type(tool_name)
+                    headers = mcp_tools[
+                        tool_type].headers if tool_type in mcp_tools else None
                     tool_session = await exit_stack.enter_async_context(
-                        tool_server.new_session(
-                            tool_name, request_id,
-                            mcp_tool_headers.get(tool_name)
-                            if tool_name in mcp_tool_headers else {}))
+                        tool_server.new_session(tool_name, request_id,
+                                                headers))
                     logger.info("Created new session for %s", tool_name)
                     self._tool_sessions[tool_name] = tool_session
                     exit_stack.push_async_exit(self.cleanup_session)
diff --git a/vllm/entrypoints/harmony_utils.py b/vllm/entrypoints/harmony_utils.py
index f7528ba81dce..23a2234274c0 100644
--- a/vllm/entrypoints/harmony_utils.py
+++ b/vllm/entrypoints/harmony_utils.py
@@ -126,8 +126,10 @@ def get_developer_message(
         function_tools: list[Union[Tool, ChatCompletionToolsParam]] = []
         for tool in tools:
             if tool.type in ("web_search_preview", "code_interpreter",
-                             "container"):
+                             "container", "mcp"):
                 # These are built-in tools that are added to the system message.
+                # Adding in MCP for now until we support MCP tools executed
+                # server side
                 pass
 
             elif tool.type == "function":
diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py
index f9a81466e164..402f2d643c70 100644
--- a/vllm/entrypoints/openai/serving_responses.py
+++ b/vllm/entrypoints/openai/serving_responses.py
@@ -451,8 +451,12 @@ async def responses_full_generator(
 
         async with AsyncExitStack() as exit_stack:
             try:
+                mcp_tools = {
+                    tool.server_label: tool
+                    for tool in request.tools if tool.type == "mcp"
+                }
                 await context.init_tool_sessions(self.tool_server, exit_stack,
-                                                 request.request_id)
+                                                 request.request_id, mcp_tools)
                 async for _ in result_generator:
                     pass
             except asyncio.CancelledError:
@@ -1635,8 +1639,12 @@ def _send_event(event: BaseModel):
         async with AsyncExitStack() as exit_stack:
             processer = None
             if self.use_harmony:
+                mcp_tools = {
+                    tool.server_label: tool
+                    for tool in request.tools if tool.type == "mcp"
+                }
                 await context.init_tool_sessions(self.tool_server, exit_stack,
-                                                 request.request_id)
+                                                 request.request_id, mcp_tools)
                 processer = self._process_harmony_streaming_events
             else:
                 processer = self._process_simple_streaming_events
diff --git a/vllm/entrypoints/tool_server.py b/vllm/entrypoints/tool_server.py
index cbbb36994592..4c627b865ef9 100644
--- a/vllm/entrypoints/tool_server.py
+++ b/vllm/entrypoints/tool_server.py
@@ -18,7 +18,6 @@
 async def list_server_and_tools(server_url: str):
     from mcp import ClientSession
     from mcp.client.sse import sse_client
-
     async with sse_client(url=server_url) as streams, ClientSession(
             *streams) as session:
         initialize_response = await session.initialize()
@@ -155,14 +154,14 @@ async def new_session(self,
         from mcp import ClientSession
         from mcp.client.sse import sse_client
         url = self.urls.get(tool_name)
-        if headers is None:
-            headers = {}
-        headers = {"x-session-id": session_id, **headers}
+        request_headers = {"x-session-id": session_id}
+        if headers is not None:
+            request_headers.update(headers)
         if not url:
             raise KeyError(f"Tool '{tool_name}' is not supported")
-        async with sse_client(url=url,
-                              headers=headers) as streams, ClientSession(
-                                  *streams) as session:
+        async with sse_client(
+                url=url, headers=request_headers) as streams, ClientSession(
+                    *streams) as session:
             await session.initialize()
             yield session
 
@@ -198,7 +197,10 @@ def get_tool_description(self,
             raise ValueError(f"Unknown tool {tool_name}")
 
     @asynccontextmanager
-    async def new_session(self, tool_name: str, session_id: str):
+    async def new_session(self,
+                          tool_name: str,
+                          session_id: str,
+                          headers: Optional[dict[str, str]] = None):
         if tool_name not in self.tools:
             raise KeyError(f"Tool '{tool_name}' is not supported")
         yield self.tools[tool_name]

From f0553bed90f591b63da1eb97a9cfc7db0696464d Mon Sep 17 00:00:00 2001
From: Alec Solder <alecs@fb.com>
Date: Mon, 15 Sep 2025 12:13:02 -0700
Subject: [PATCH 3/8] Removing log line

Signed-off-by: Alec Solder <alecs@fb.com>
---
 vllm/entrypoints/context.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/vllm/entrypoints/context.py b/vllm/entrypoints/context.py
index 42332ccec5f0..f5449e9732f0 100644
--- a/vllm/entrypoints/context.py
+++ b/vllm/entrypoints/context.py
@@ -338,7 +338,6 @@ async def init_tool_sessions(self, tool_server: Optional[ToolServer],
                     tool_session = await exit_stack.enter_async_context(
                         tool_server.new_session(tool_name, request_id,
                                                 headers))
-                    logger.info("Created new session for %s", tool_name)
                     self._tool_sessions[tool_name] = tool_session
                     exit_stack.push_async_exit(self.cleanup_session)
 

From 94162b022292669ca328751878a532f176ca8c17 Mon Sep 17 00:00:00 2001
From: Alec Solder <alecs@fb.com>
Date: Tue, 16 Sep 2025 12:51:48 -0700
Subject: [PATCH 4/8] Enabling oss coding tool for tests

Signed-off-by: Alec Solder <alecs@fb.com>
---
 .../openai/test_response_api_with_harmony.py  | 22 +++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/tests/entrypoints/openai/test_response_api_with_harmony.py b/tests/entrypoints/openai/test_response_api_with_harmony.py
index 3df433766db5..9fb34316ec02 100644
--- a/tests/entrypoints/openai/test_response_api_with_harmony.py
+++ b/tests/entrypoints/openai/test_response_api_with_harmony.py
@@ -28,6 +28,8 @@ def server(monkeypatch_module: pytest.MonkeyPatch):
 
     with monkeypatch_module.context() as m:
         m.setenv("VLLM_ENABLE_RESPONSES_API_STORE", "1")
+        # This is necessary to test with code_interpreter on the demo server
+        m.setenv("PYTHON_EXECUTION_BACKEND", "dangerously_use_uv")
         with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
             yield remote_server
 
@@ -357,11 +359,16 @@ async def test_web_search(client: OpenAI, model_name: str):
 
 @pytest.mark.asyncio
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
-@pytest.mark.skip(reason="Code interpreter tool is not available in CI yet.")
 async def test_code_interpreter(client: OpenAI, model_name: str):
     response = await client.responses.create(
         model=model_name,
-        input="Multiply 64548*15151 using builtin python interpreter.",
+        # TODO: Ideally should be able to set max tool calls
+        # to prevent multi-turn, but it is not currently supported
+        # would speed up the test
+        input=("What's the first 4 digits after the decimal point of "
+               "cube root of `19910212 * 20250910`? "
+               "Show only the digits. The python interpreter is not stateful "
+               "and you must print to see the output."),
         tools=[{
             "type": "code_interpreter",
             "container": {
@@ -371,15 +378,21 @@ async def test_code_interpreter(client: OpenAI, model_name: str):
     )
     assert response is not None
     assert response.status == "completed"
+    assert response.usage.output_tokens_details.tool_output_tokens > 0
 
 
 @pytest.mark.asyncio
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
-@pytest.mark.skip(reason="Code interpreter tool is not available in CI yet.")
 async def test_mcp_tool(client: OpenAI, model_name: str):
     response = await client.responses.create(
         model=model_name,
-        input="Please multiply 123 and 456 using the available python tool.",
+        # TODO: Ideally should be able to set max tool calls
+        # to prevent multi-turn, but it is not currently supported
+        # would speed up the test
+        input=("What's the first 4 digits after the decimal point of "
+               "cube root of `19910212 * 20250910`? "
+               "Show only the digits. The python interpreter is not stateful "
+               "and you must print to see the output."),
         tools=[{
             "type": "mcp",
             "server_label": "code_interpreter",
@@ -389,6 +402,7 @@ async def test_mcp_tool(client: OpenAI, model_name: str):
     )
     assert response is not None
     assert response.status == "completed"
+    assert response.usage.output_tokens_details.tool_output_tokens > 0
 
 
 def get_weather(latitude, longitude):

From 8bc171d0c037b044f6d063c49c3b73f8b828a024 Mon Sep 17 00:00:00 2001
From: Alec Solder <alecs@fb.com>
Date: Tue, 16 Sep 2025 16:10:32 -0700
Subject: [PATCH 5/8] adding the skip back

Signed-off-by: Alec Solder <alecs@fb.com>
---
 tests/entrypoints/openai/test_response_api_with_harmony.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/entrypoints/openai/test_response_api_with_harmony.py b/tests/entrypoints/openai/test_response_api_with_harmony.py
index 73d55589b636..b2e6cd8ae697 100644
--- a/tests/entrypoints/openai/test_response_api_with_harmony.py
+++ b/tests/entrypoints/openai/test_response_api_with_harmony.py
@@ -28,8 +28,6 @@ def server(monkeypatch_module: pytest.MonkeyPatch):
 
     with monkeypatch_module.context() as m:
         m.setenv("VLLM_ENABLE_RESPONSES_API_STORE", "1")
-        # This is necessary to test with code_interpreter on the demo server
-        m.setenv("PYTHON_EXECUTION_BACKEND", "dangerously_use_uv")
         with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
             yield remote_server
 
@@ -396,6 +394,7 @@ async def test_web_search(client: OpenAI, model_name: str):
 
 @pytest.mark.asyncio
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
+@pytest.mark.skip(reason="Code interpreter tool is not available in CI yet.")
 async def test_code_interpreter(client: OpenAI, model_name: str):
     response = await client.responses.create(
         model=model_name,
@@ -420,6 +419,7 @@ async def test_code_interpreter(client: OpenAI, model_name: str):
 
 @pytest.mark.asyncio
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
+@pytest.mark.skip(reason="Code interpreter tool is not available in CI yet.")
 async def test_mcp_tool(client: OpenAI, model_name: str):
     response = await client.responses.create(
         model=model_name,

From aa572f5d80dfb3b18798b4b15a49e50f5c024550 Mon Sep 17 00:00:00 2001
From: Alec Solder <alecs@fb.com>
Date: Fri, 19 Sep 2025 14:09:00 -0700
Subject: [PATCH 6/8] Moving MCP tests to separate file, adding env variable
 for allowing mcp tools to enable built in tools

Signed-off-by: Alec Solder <alecs@fb.com>
---
 .../openai/test_response_api_mcp_tools.py     | 107 ++++++++++++++++++
 .../openai/test_response_api_with_harmony.py  |  25 ----
 vllm/entrypoints/openai/serving_responses.py  |  19 ++--
 vllm/envs.py                                  |  10 +-
 4 files changed, 122 insertions(+), 39 deletions(-)
 create mode 100644 tests/entrypoints/openai/test_response_api_mcp_tools.py

diff --git a/tests/entrypoints/openai/test_response_api_mcp_tools.py b/tests/entrypoints/openai/test_response_api_mcp_tools.py
new file mode 100644
index 000000000000..d24c557b79c0
--- /dev/null
+++ b/tests/entrypoints/openai/test_response_api_mcp_tools.py
@@ -0,0 +1,107 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+import pytest_asyncio
+from openai import OpenAI
+
+from ...utils import RemoteOpenAIServer
+
+MODEL_NAME = "openai/gpt-oss-20b"
+
+
+@pytest.fixture(scope="module")
+def monkeypatch_module():
+    from _pytest.monkeypatch import MonkeyPatch
+    mpatch = MonkeyPatch()
+    yield mpatch
+    mpatch.undo()
+
+
+@pytest.fixture(scope="module")
+def mcp_disabled_server(monkeypatch_module: pytest.MonkeyPatch):
+    args = ["--enforce-eager", "--tool-server", "demo"]
+
+    with monkeypatch_module.context() as m:
+        m.setenv("VLLM_ENABLE_RESPONSES_API_STORE", "1")
+        m.setenv("PYTHON_EXECUTION_BACKEND", "dangerously_use_uv")
+        with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
+            yield remote_server
+
+
+@pytest.fixture(scope="function")
+def mcp_enabled_server(monkeypatch_module: pytest.MonkeyPatch):
+    args = ["--enforce-eager", "--tool-server", "demo"]
+
+    with monkeypatch_module.context() as m:
+        m.setenv("VLLM_ENABLE_RESPONSES_API_STORE", "1")
+        m.setenv("PYTHON_EXECUTION_BACKEND", "dangerously_use_uv")
+        m.setenv("GPT_OSS_SYSTEM_TOOL_MCP_LABELS",
+                 "code_interpreter,container")
+        with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
+            yield remote_server
+
+
+@pytest_asyncio.fixture
+async def mcp_disabled_client(mcp_disabled_server):
+    async with mcp_disabled_server.get_async_client() as async_client:
+        yield async_client
+
+
+@pytest_asyncio.fixture
+async def mcp_enabled_client(mcp_enabled_server):
+    async with mcp_enabled_server.get_async_client() as async_client:
+        yield async_client
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("model_name", [MODEL_NAME])
+@pytest.mark.skip(reason="Code interpreter tool is not available in CI yet.")
+async def test_mcp_tool_env_flag_enabled(mcp_enabled_client: OpenAI,
+                                         model_name: str):
+    response = await mcp_enabled_client.responses.create(
+        model=model_name,
+        # TODO: Ideally should be able to set max tool calls
+        # to prevent multi-turn, but it is not currently supported
+        # would speed up the test
+        input=("What's the first 4 digits after the decimal point of "
+               "cube root of `19910212 * 20250910`? "
+               "Show only the digits. The python interpreter is not stateful "
+               "and you must print to see the output."),
+        tools=[{
+            "type": "mcp",
+            "server_label": "code_interpreter",
+            # URL unused for DemoToolServer
+            "server_url": "http://localhost:8888"
+        }],
+    )
+    assert response is not None
+    assert response.status == "completed"
+    assert response.usage.output_tokens_details.tool_output_tokens > 0
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("model_name", [MODEL_NAME])
+@pytest.mark.skip(reason="Code interpreter tool is not available in CI yet.")
+async def test_mcp_tool_env_flag_disabled(
+        mcp_disabled_client: OpenAI, model_name: str,
+        monkeypatch_module: pytest.MonkeyPatch):
+    response = await mcp_disabled_client.responses.create(
+        model=model_name,
+        # TODO: Ideally should be able to set max tool calls
+        # to prevent multi-turn, but it is not currently supported
+        # would speed up the test
+        input=("What's the first 4 digits after the decimal point of "
+               "cube root of `19910212 * 20250910`? "
+               "Show only the digits. The python interpreter is not stateful "
+               "and you must print to see the output."),
+        tools=[{
+            "type": "mcp",
+            "server_label": "code_interpreter",
+            # URL unused for DemoToolServer
+            "server_url": "http://localhost:8888"
+        }],
+    )
+    assert response is not None
+    assert response.status == "completed"
+    assert response.usage.output_tokens_details.tool_output_tokens == 0
diff --git a/tests/entrypoints/openai/test_response_api_with_harmony.py b/tests/entrypoints/openai/test_response_api_with_harmony.py
index b2e6cd8ae697..b219b03d0795 100644
--- a/tests/entrypoints/openai/test_response_api_with_harmony.py
+++ b/tests/entrypoints/openai/test_response_api_with_harmony.py
@@ -417,31 +417,6 @@ async def test_code_interpreter(client: OpenAI, model_name: str):
     assert response.usage.output_tokens_details.tool_output_tokens > 0
 
 
-@pytest.mark.asyncio
-@pytest.mark.parametrize("model_name", [MODEL_NAME])
-@pytest.mark.skip(reason="Code interpreter tool is not available in CI yet.")
-async def test_mcp_tool(client: OpenAI, model_name: str):
-    response = await client.responses.create(
-        model=model_name,
-        # TODO: Ideally should be able to set max tool calls
-        # to prevent multi-turn, but it is not currently supported
-        # would speed up the test
-        input=("What's the first 4 digits after the decimal point of "
-               "cube root of `19910212 * 20250910`? "
-               "Show only the digits. The python interpreter is not stateful "
-               "and you must print to see the output."),
-        tools=[{
-            "type": "mcp",
-            "server_label": "code_interpreter",
-            # URL unused for DemoToolServer
-            "server_url": "http://localhost:8888"
-        }],
-    )
-    assert response is not None
-    assert response.status == "completed"
-    assert response.usage.output_tokens_details.tool_output_tokens > 0
-
-
 def get_weather(latitude, longitude):
     response = requests.get(
         f"https://api.open-meteo.com/v1/forecast?latitude={latitude}&longitude={longitude}&current=temperature_2m,wind_speed_10m&hourly=temperature_2m,relative_humidity_2m,wind_speed_10m"  # noqa
diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py
index d8c7ebded97d..7a700d0bbee2 100644
--- a/vllm/entrypoints/openai/serving_responses.py
+++ b/vllm/entrypoints/openai/serving_responses.py
@@ -732,15 +732,16 @@ def _construct_input_messages_with_harmony(
             # New conversation.
             reasoning_effort = (request.reasoning.effort
                                 if request.reasoning else None)
-            # If there is a MCP tool with a matching server_label, then
-            # the tool is enabled. Native types like code_interpreter work
-            # as well
-            tool_types = [
-                tool.server_label if tool.type == "mcp" else tool.type
-                for tool in request.tools
-            ]
-            if envs.VLLM_GPT_OSS_USE_CONTAINER_TOOL:
-                tool_types.append("container")
+            tool_types = [tool.type for tool in request.tools]
+
+            # Allow the MCP Tool type to enable built in tools if the
+            # server_label is allowlisted in
+            # envs.GPT_OSS_SYSTEM_TOOL_MCP_LABELS
+            if envs.GPT_OSS_SYSTEM_TOOL_MCP_LABELS:
+                for tool in request.tools:
+                    if (tool.type == "mcp" and tool.server_label
+                            in envs.GPT_OSS_SYSTEM_TOOL_MCP_LABELS):
+                        tool_types.append(tool.server_label)
             enable_browser = ("web_search_preview" in tool_types
                               and self.tool_server is not None
                               and self.tool_server.has_tool("browser"))
diff --git a/vllm/envs.py b/vllm/envs.py
index d2006979ea81..cbf99d853546 100755
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -174,11 +174,11 @@
     VLLM_ALLREDUCE_USE_SYMM_MEM: bool = False
     VLLM_TUNED_CONFIG_FOLDER: Optional[str] = None
     VLLM_DISABLE_PAD_FOR_CUDAGRAPH: bool = False
-    VLLM_GPT_OSS_USE_CONTAINER_TOOL: bool = False
     VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS: bool = False
     VLLM_CUSTOM_SCOPES_FOR_PROFILING: bool = False
     VLLM_KV_EVENTS_USE_INT_BLOCK_HASHES: bool = True
     VLLM_OBJECT_STORAGE_SHM_BUFFER_NAME: str = "VLLM_OBJECT_STORAGE_SHM_BUFFER"
+    GPT_OSS_SYSTEM_TOOL_MCP_LABELS: list[str] = []
 
 
 def get_default_cache_root():
@@ -1246,10 +1246,6 @@ def get_vllm_port() -> Optional[int]:
     "VLLM_TUNED_CONFIG_FOLDER":
     lambda: os.getenv("VLLM_TUNED_CONFIG_FOLDER", None),
 
-    # Allows vllm use container tool
-    "VLLM_GPT_OSS_USE_CONTAINER_TOOL":
-    lambda: bool(int(os.getenv("VLLM_GPT_OSS_USE_CONTAINER_TOOL", "0"))),
-
     # Allows harmony instructions to be injected on system messages
     "VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS":
     lambda: bool(
@@ -1269,6 +1265,10 @@ def get_vllm_port() -> Optional[int]:
     "VLLM_OBJECT_STORAGE_SHM_BUFFER_NAME":
     lambda: os.getenv("VLLM_OBJECT_STORAGE_SHM_BUFFER_NAME",
                       "VLLM_OBJECT_STORAGE_SHM_BUFFER"),
+
+    "GPT_OSS_SYSTEM_TOOL_MCP_LABELS":
+    lambda: ([] if "GPT_OSS_SYSTEM_TOOL_MCP_LABELS" not in os.environ
+            else os.environ["GPT_OSS_SYSTEM_TOOL_MCP_LABELS"].split(",")),
 }
 
 # --8<-- [end:env-vars-definition]

From 0bd5e5359acee9e1d49a84c324be42dd69553a47 Mon Sep 17 00:00:00 2001
From: Alec Solder <alecs@fb.com>
Date: Fri, 19 Sep 2025 15:38:52 -0700
Subject: [PATCH 7/8] Fixing nits and adding env variable validation

Signed-off-by: Alec Solder <alecs@fb.com>
---
 .../openai/test_response_api_mcp_tools.py     |  5 +-
 vllm/entrypoints/context.py                   | 21 ++++---
 vllm/envs.py                                  | 60 ++++++++++++++++++-
 3 files changed, 72 insertions(+), 14 deletions(-)

diff --git a/tests/entrypoints/openai/test_response_api_mcp_tools.py b/tests/entrypoints/openai/test_response_api_mcp_tools.py
index d24c557b79c0..b0eb84712c19 100644
--- a/tests/entrypoints/openai/test_response_api_mcp_tools.py
+++ b/tests/entrypoints/openai/test_response_api_mcp_tools.py
@@ -83,9 +83,8 @@ async def test_mcp_tool_env_flag_enabled(mcp_enabled_client: OpenAI,
 @pytest.mark.asyncio
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
 @pytest.mark.skip(reason="Code interpreter tool is not available in CI yet.")
-async def test_mcp_tool_env_flag_disabled(
-        mcp_disabled_client: OpenAI, model_name: str,
-        monkeypatch_module: pytest.MonkeyPatch):
+async def test_mcp_tool_env_flag_disabled(mcp_disabled_client: OpenAI,
+                                          model_name: str):
     response = await mcp_disabled_client.responses.create(
         model=model_name,
         # TODO: Ideally should be able to set max tool calls
diff --git a/vllm/entrypoints/context.py b/vllm/entrypoints/context.py
index 793d8f63083c..ea81fdbcd825 100644
--- a/vllm/entrypoints/context.py
+++ b/vllm/entrypoints/context.py
@@ -22,20 +22,23 @@
 
 logger = logging.getLogger(__name__)
 
-
 # This is currently needed as the tool type doesn't 1:1 match the
 # tool namespace, which is what is used to look up the
 # connection to the tool server
+_TOOL_NAME_TO_TYPE_MAP = {
+    "browser": "web_search_preview",
+    "python": "code_interpreter",
+    "container": "container",
+}
+
+
 def _map_tool_name_to_tool_type(tool_name: str) -> str:
-    if tool_name == "browser":
-        return "web_search_preview"
-    elif tool_name == "python":
-        return "code_interpreter"
-    elif tool_name == "container":
-        return "container"
-    else:
+    if tool_name not in _TOOL_NAME_TO_TYPE_MAP:
+        available_tools = ', '.join(_TOOL_NAME_TO_TYPE_MAP.keys())
         raise ValueError(
-            f"Built in tool name not defined in mapping: {tool_name}")
+            f"Built-in tool name '{tool_name}' not defined in mapping. "
+            f"Available tools: {available_tools}")
+    return _TOOL_NAME_TO_TYPE_MAP[tool_name]
 
 
 class TurnTokens:
diff --git a/vllm/envs.py b/vllm/envs.py
index c5e29419ecc8..c71597054287 100755
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -257,6 +257,58 @@ def _get_validated_env() -> Optional[str]:
     return _get_validated_env
 
 
+def env_list_with_choices(
+        env_name: str,
+        default: list[str],
+        choices: Union[list[str], Callable[[], list[str]]],
+        case_sensitive: bool = True) -> Callable[[], list[str]]:
+    """
+    Create a lambda that validates environment variable 
+    containing comma-separated values against allowed choices
+    
+    Args:
+        env_name: Name of the environment variable
+        default: Default list of values if not set
+        choices: List of valid string options or callable that returns list
+        case_sensitive: Whether validation should be case sensitive
+        
+    Returns:
+        Lambda function for environment_variables
+        dict that returns list of strings
+    """
+
+    def _get_validated_env_list() -> list[str]:
+        value = os.getenv(env_name)
+        if value is None:
+            return default
+
+        # Split comma-separated values and strip whitespace
+        values = [v.strip() for v in value.split(",") if v.strip()]
+
+        if not values:
+            return default
+
+        # Resolve choices if it's a callable (for lazy loading)
+        actual_choices = choices() if callable(choices) else choices
+
+        # Validate each value
+        for val in values:
+            if not case_sensitive:
+                check_value = val.lower()
+                check_choices = [choice.lower() for choice in actual_choices]
+            else:
+                check_value = val
+                check_choices = actual_choices
+
+            if check_value not in check_choices:
+                raise ValueError(f"Invalid value '{val}' in {env_name}. "
+                                 f"Valid options: {actual_choices}.")
+
+        return values
+
+    return _get_validated_env_list
+
+
 def get_vllm_port() -> Optional[int]:
     """Get the port from VLLM_PORT environment variable.
 
@@ -1329,9 +1381,13 @@ def get_vllm_port() -> Optional[int]:
     lambda: os.getenv("VLLM_OBJECT_STORAGE_SHM_BUFFER_NAME",
                       "VLLM_OBJECT_STORAGE_SHM_BUFFER"),
 
+    # Valid values are container,code_interpreter,web_search_preview
+    # ex GPT_OSS_SYSTEM_TOOL_MCP_LABELS=container,code_interpreter
     "GPT_OSS_SYSTEM_TOOL_MCP_LABELS":
-    lambda: ([] if "GPT_OSS_SYSTEM_TOOL_MCP_LABELS" not in os.environ
-            else os.environ["GPT_OSS_SYSTEM_TOOL_MCP_LABELS"].split(",")),
+    env_list_with_choices("GPT_OSS_SYSTEM_TOOL_MCP_LABELS", [],
+                            ["container",
+                            "code_interpreter",
+                            "web_search_preview"]),
 }
 
 # --8<-- [end:env-vars-definition]

From 823414a511f35e85d9417e728cbff95f371d8b89 Mon Sep 17 00:00:00 2001
From: Alec Solder <alecs@fb.com>
Date: Mon, 22 Sep 2025 13:03:40 -0700
Subject: [PATCH 8/8] Adding tests for env variable parsing

Signed-off-by: Alec Solder <alecs@fb.com>
---
 tests/test_envs.py | 216 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 216 insertions(+)
 create mode 100644 tests/test_envs.py

diff --git a/tests/test_envs.py b/tests/test_envs.py
new file mode 100644
index 000000000000..f81a6e2e415c
--- /dev/null
+++ b/tests/test_envs.py
@@ -0,0 +1,216 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import os
+from unittest.mock import patch
+
+import pytest
+
+from vllm.envs import env_list_with_choices, env_with_choices
+
+
+class TestEnvWithChoices:
+    """Test cases for env_with_choices function."""
+
+    def test_default_value_returned_when_env_not_set(self):
+        """Test default is returned when env var is not set."""
+        env_func = env_with_choices("NONEXISTENT_ENV", "default",
+                                    ["option1", "option2"])
+        assert env_func() == "default"
+
+    def test_none_default_returned_when_env_not_set(self):
+        """Test that None is returned when env not set and default is None."""
+        env_func = env_with_choices("NONEXISTENT_ENV", None,
+                                    ["option1", "option2"])
+        assert env_func() is None
+
+    def test_valid_value_returned_case_sensitive(self):
+        """Test that valid value is returned in case sensitive mode."""
+        with patch.dict(os.environ, {"TEST_ENV": "option1"}):
+            env_func = env_with_choices("TEST_ENV",
+                                        "default", ["option1", "option2"],
+                                        case_sensitive=True)
+            assert env_func() == "option1"
+
+    def test_valid_lowercase_value_returned_case_insensitive(self):
+        """Test that lowercase value is accepted in case insensitive mode."""
+        with patch.dict(os.environ, {"TEST_ENV": "option1"}):
+            env_func = env_with_choices("TEST_ENV",
+                                        "default", ["OPTION1", "OPTION2"],
+                                        case_sensitive=False)
+            assert env_func() == "option1"
+
+    def test_valid_uppercase_value_returned_case_insensitive(self):
+        """Test that uppercase value is accepted in case insensitive mode."""
+        with patch.dict(os.environ, {"TEST_ENV": "OPTION1"}):
+            env_func = env_with_choices("TEST_ENV",
+                                        "default", ["option1", "option2"],
+                                        case_sensitive=False)
+            assert env_func() == "OPTION1"
+
+    def test_invalid_value_raises_error_case_sensitive(self):
+        """Test that invalid value raises ValueError in case sensitive mode."""
+        with patch.dict(os.environ, {"TEST_ENV": "invalid"}):
+            env_func = env_with_choices("TEST_ENV",
+                                        "default", ["option1", "option2"],
+                                        case_sensitive=True)
+            with pytest.raises(ValueError,
+                               match="Invalid value 'invalid' for TEST_ENV"):
+                env_func()
+
+    def test_case_mismatch_raises_error_case_sensitive(self):
+        """Test that case mismatch raises ValueError in case sensitive mode."""
+        with patch.dict(os.environ, {"TEST_ENV": "OPTION1"}):
+            env_func = env_with_choices("TEST_ENV",
+                                        "default", ["option1", "option2"],
+                                        case_sensitive=True)
+            with pytest.raises(ValueError,
+                               match="Invalid value 'OPTION1' for TEST_ENV"):
+                env_func()
+
+    def test_invalid_value_raises_error_case_insensitive(self):
+        """Test that invalid value raises ValueError when case insensitive."""
+        with patch.dict(os.environ, {"TEST_ENV": "invalid"}):
+            env_func = env_with_choices("TEST_ENV",
+                                        "default", ["option1", "option2"],
+                                        case_sensitive=False)
+            with pytest.raises(ValueError,
+                               match="Invalid value 'invalid' for TEST_ENV"):
+                env_func()
+
+    def test_callable_choices_resolved_correctly(self):
+        """Test that callable choices are resolved correctly."""
+
+        def get_choices():
+            return ["dynamic1", "dynamic2"]
+
+        with patch.dict(os.environ, {"TEST_ENV": "dynamic1"}):
+            env_func = env_with_choices("TEST_ENV", "default", get_choices)
+            assert env_func() == "dynamic1"
+
+    def test_callable_choices_with_invalid_value(self):
+        """Test that callable choices raise error for invalid values."""
+
+        def get_choices():
+            return ["dynamic1", "dynamic2"]
+
+        with patch.dict(os.environ, {"TEST_ENV": "invalid"}):
+            env_func = env_with_choices("TEST_ENV", "default", get_choices)
+            with pytest.raises(ValueError,
+                               match="Invalid value 'invalid' for TEST_ENV"):
+                env_func()
+
+
+class TestEnvListWithChoices:
+    """Test cases for env_list_with_choices function."""
+
+    def test_default_list_returned_when_env_not_set(self):
+        """Test that default list is returned when env var is not set."""
+        env_func = env_list_with_choices("NONEXISTENT_ENV",
+                                         ["default1", "default2"],
+                                         ["option1", "option2"])
+        assert env_func() == ["default1", "default2"]
+
+    def test_empty_default_list_returned_when_env_not_set(self):
+        """Test that empty default list is returned when env not set."""
+        env_func = env_list_with_choices("NONEXISTENT_ENV", [],
+                                         ["option1", "option2"])
+        assert env_func() == []
+
+    def test_single_valid_value_parsed_correctly(self):
+        """Test that single valid value is parsed correctly."""
+        with patch.dict(os.environ, {"TEST_ENV": "option1"}):
+            env_func = env_list_with_choices("TEST_ENV", [],
+                                             ["option1", "option2"])
+            assert env_func() == ["option1"]
+
+    def test_multiple_valid_values_parsed_correctly(self):
+        """Test that multiple valid values are parsed correctly."""
+        with patch.dict(os.environ, {"TEST_ENV": "option1,option2"}):
+            env_func = env_list_with_choices("TEST_ENV", [],
+                                             ["option1", "option2"])
+            assert env_func() == ["option1", "option2"]
+
+    def test_values_with_whitespace_trimmed(self):
+        """Test that values with whitespace are trimmed correctly."""
+        with patch.dict(os.environ, {"TEST_ENV": " option1 , option2 "}):
+            env_func = env_list_with_choices("TEST_ENV", [],
+                                             ["option1", "option2"])
+            assert env_func() == ["option1", "option2"]
+
+    def test_empty_values_filtered_out(self):
+        """Test that empty values are filtered out."""
+        with patch.dict(os.environ, {"TEST_ENV": "option1,,option2,"}):
+            env_func = env_list_with_choices("TEST_ENV", [],
+                                             ["option1", "option2"])
+            assert env_func() == ["option1", "option2"]
+
+    def test_empty_string_returns_default(self):
+        """Test that empty string returns default."""
+        with patch.dict(os.environ, {"TEST_ENV": ""}):
+            env_func = env_list_with_choices("TEST_ENV", ["default"],
+                                             ["option1", "option2"])
+            assert env_func() == ["default"]
+
+    def test_only_commas_returns_default(self):
+        """Test that string with only commas returns default."""
+        with patch.dict(os.environ, {"TEST_ENV": ",,,"}):
+            env_func = env_list_with_choices("TEST_ENV", ["default"],
+                                             ["option1", "option2"])
+            assert env_func() == ["default"]
+
+    def test_case_sensitive_validation(self):
+        """Test case sensitive validation."""
+        with patch.dict(os.environ, {"TEST_ENV": "option1,OPTION2"}):
+            env_func = env_list_with_choices("TEST_ENV", [],
+                                             ["option1", "option2"],
+                                             case_sensitive=True)
+            with pytest.raises(ValueError,
+                               match="Invalid value 'OPTION2' in TEST_ENV"):
+                env_func()
+
+    def test_case_insensitive_validation(self):
+        """Test case insensitive validation."""
+        with patch.dict(os.environ, {"TEST_ENV": "OPTION1,option2"}):
+            env_func = env_list_with_choices("TEST_ENV", [],
+                                             ["option1", "option2"],
+                                             case_sensitive=False)
+            assert env_func() == ["OPTION1", "option2"]
+
+    def test_invalid_value_in_list_raises_error(self):
+        """Test that invalid value in list raises ValueError."""
+        with patch.dict(os.environ, {"TEST_ENV": "option1,invalid,option2"}):
+            env_func = env_list_with_choices("TEST_ENV", [],
+                                             ["option1", "option2"])
+            with pytest.raises(ValueError,
+                               match="Invalid value 'invalid' in TEST_ENV"):
+                env_func()
+
+    def test_callable_choices_resolved_correctly(self):
+        """Test that callable choices are resolved correctly."""
+
+        def get_choices():
+            return ["dynamic1", "dynamic2"]
+
+        with patch.dict(os.environ, {"TEST_ENV": "dynamic1,dynamic2"}):
+            env_func = env_list_with_choices("TEST_ENV", [], get_choices)
+            assert env_func() == ["dynamic1", "dynamic2"]
+
+    def test_callable_choices_with_invalid_value(self):
+        """Test that callable choices raise error for invalid values."""
+
+        def get_choices():
+            return ["dynamic1", "dynamic2"]
+
+        with patch.dict(os.environ, {"TEST_ENV": "dynamic1,invalid"}):
+            env_func = env_list_with_choices("TEST_ENV", [], get_choices)
+            with pytest.raises(ValueError,
+                               match="Invalid value 'invalid' in TEST_ENV"):
+                env_func()
+
+    def test_duplicate_values_preserved(self):
+        """Test that duplicate values in the list are preserved."""
+        with patch.dict(os.environ, {"TEST_ENV": "option1,option1,option2"}):
+            env_func = env_list_with_choices("TEST_ENV", [],
+                                             ["option1", "option2"])
+            assert env_func() == ["option1", "option1", "option2"]