Skip to content

Commit 30d88d9

Browse files
authored
chore(mcp): add server info metadata on client session "root" spans (#15306)
#14828) ## Description Adds server info metadata to llm observability mcp client session "root" spans. This information includes - server name - server version - server title Additionally, adds similar tags to client tool spans: - For both client and server tool calls, mark which kind of tool call they are on a tag (as besides the name, which could vary for manual instrumentation, it is not apparent as they are just normal tool calls otherwise) - adds the mcp server name of the session the client tool call was used for Lastly, generally for the `mcp` integration, we now mark client tool calls as errors when the tool result returned from the mcp server was an error as well. MLOB-4147 ## Testing Add unit tests, and additionally, verified through internal testing against a staging hash in our frontend. ## Risks None ## Description <!-- Provide an overview of the change and motivation for the change --> ## Testing <!-- Describe your testing strategy or note what tests are included --> ## Risks <!-- Note any risks associated with this change, or "None" if no risks --> ## Additional Notes <!-- Any other information that would be helpful for reviewers -->
1 parent a234511 commit 30d88d9

File tree

5 files changed

+149
-45
lines changed

5 files changed

+149
-45
lines changed

ddtrace/contrib/internal/mcp/patch.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from ddtrace import config
1010
from ddtrace._trace.pin import Pin
1111
from ddtrace._trace.span import Span
12+
from ddtrace.constants import ERROR_MSG
1213
from ddtrace.contrib.internal.trace_utils import activate_distributed_headers
1314
from ddtrace.contrib.trace_utils import unwrap
1415
from ddtrace.contrib.trace_utils import with_traced_module
@@ -109,15 +110,25 @@ def traced_send_request(mcp, pin: Pin, func, instance, args: tuple, kwargs: dict
109110

110111
@with_traced_module
111112
async def traced_call_tool(mcp, pin: Pin, func, instance, args: tuple, kwargs: dict):
112-
integration = mcp._datadog_integration
113+
integration: MCPIntegration = mcp._datadog_integration
113114

114-
span = integration.trace(pin, CLIENT_TOOL_CALL_OPERATION_NAME, submit_to_llmobs=True)
115+
span: Span = integration.trace(pin, CLIENT_TOOL_CALL_OPERATION_NAME, submit_to_llmobs=True)
115116

116117
try:
117118
result = await func(*args, **kwargs)
119+
120+
if getattr(result, "isError", False):
121+
content = getattr(result, "content", [])
122+
span.error = 1
123+
124+
content_block = content[0] if content and isinstance(content, list) else None
125+
if content_block and getattr(content_block, "text", None):
126+
span.set_tag(ERROR_MSG, content_block.text)
127+
118128
integration.llmobs_set_tags(
119129
span, args=args, kwargs=kwargs, response=result, operation=CLIENT_TOOL_CALL_OPERATION_NAME
120130
)
131+
121132
return result
122133
except Exception:
123134
integration.llmobs_set_tags(
@@ -182,7 +193,7 @@ async def traced_client_session_list_tools(mcp, pin: Pin, func, instance, args:
182193
@with_traced_module
183194
async def traced_client_session_aenter(mcp, pin: Pin, func, instance, args: tuple, kwargs: dict):
184195
integration: MCPIntegration = mcp._datadog_integration
185-
span = integration.trace(pin, instance.__class__.__name__, submit_to_llmobs=True)
196+
span = integration.trace(pin, instance.__class__.__name__, submit_to_llmobs=True, type="client_session")
186197

187198
setattr(instance, "_dd_span", span)
188199
try:

ddtrace/llmobs/_integrations/mcp.py

Lines changed: 72 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,14 @@
33
from typing import List
44
from typing import Optional
55

6+
from ddtrace._trace.pin import Pin
67
from ddtrace.internal.logger import get_logger
78
from ddtrace.internal.utils import get_argument_value
89
from ddtrace.llmobs._constants import INPUT_VALUE
910
from ddtrace.llmobs._constants import NAME
1011
from ddtrace.llmobs._constants import OUTPUT_VALUE
1112
from ddtrace.llmobs._constants import SPAN_KIND
13+
from ddtrace.llmobs._constants import TAGS
1214
from ddtrace.llmobs._integrations.base import BaseLLMIntegration
1315
from ddtrace.llmobs._utils import _get_attr
1416
from ddtrace.llmobs._utils import safe_json
@@ -17,13 +19,46 @@
1719

1820
log = get_logger(__name__)
1921

22+
MCP_SPAN_TYPE = "_ml_obs.mcp_span_type"
23+
2024
SERVER_TOOL_CALL_OPERATION_NAME = "server_tool_call"
2125
CLIENT_TOOL_CALL_OPERATION_NAME = "client_tool_call"
2226

2327

28+
def _find_client_session_root(span: Optional[Span]) -> Optional[Span]:
29+
"""
30+
Find the root span of a client session.
31+
Note that this will not work in distributed tracing, but since
32+
all client operations should happen in the same service or process,
33+
this should mostly be safe.
34+
"""
35+
while span is not None:
36+
if span._get_ctx_item(MCP_SPAN_TYPE) == "client_session":
37+
return span
38+
span = span._parent
39+
return None
40+
41+
42+
def _set_or_update_tags(span: Span, tags: Dict[str, str]) -> None:
43+
existing_tags: Optional[Dict[str, str]] = span._get_ctx_item(TAGS)
44+
if existing_tags is not None:
45+
existing_tags.update(tags)
46+
else:
47+
span._set_ctx_item(TAGS, tags)
48+
49+
2450
class MCPIntegration(BaseLLMIntegration):
2551
_integration_name = "mcp"
2652

53+
def trace(self, pin: Pin, operation_id: str, submit_to_llmobs: bool = False, **kwargs) -> Span:
54+
span = super().trace(pin, operation_id, submit_to_llmobs, **kwargs)
55+
56+
mcp_span_type = kwargs.get("type", None)
57+
if mcp_span_type:
58+
span._set_ctx_item(MCP_SPAN_TYPE, mcp_span_type)
59+
60+
return span
61+
2762
def _parse_mcp_text_content(self, item: Any) -> Dict[str, Any]:
2863
"""Parse MCP TextContent fields, extracting only non-None values."""
2964
content_block = {
@@ -69,7 +104,19 @@ def _llmobs_set_tags_client(self, span: Span, args: List[Any], kwargs: Dict[str,
69104
}
70105
)
71106

72-
if span.error or response is None:
107+
client_session_root = _find_client_session_root(span)
108+
if client_session_root:
109+
client_session_root_tags = client_session_root._get_ctx_item(TAGS) or {}
110+
_set_or_update_tags(
111+
span,
112+
{
113+
"mcp_server_name": client_session_root_tags.get("mcp_server_name", ""),
114+
},
115+
)
116+
117+
_set_or_update_tags(span, {"mcp_tool_kind": "client"})
118+
119+
if response is None:
73120
return
74121

75122
# Tool response is `mcp.types.CallToolResult` type
@@ -94,6 +141,8 @@ def _llmobs_set_tags_server(self, span: Span, args: List[Any], kwargs: Dict[str,
94141
}
95142
)
96143

144+
_set_or_update_tags(span, {"mcp_tool_kind": "server"})
145+
97146
if span.error or response is None:
98147
return
99148

@@ -107,7 +156,28 @@ def _llmobs_set_tags_server(self, span: Span, args: List[Any], kwargs: Dict[str,
107156
span._set_ctx_item(OUTPUT_VALUE, output_value)
108157

109158
def _llmobs_set_tags_initialize(self, span: Span, args: List[Any], kwargs: Dict[str, Any], response: Any) -> None:
110-
span._set_ctx_items({NAME: "MCP Client Initialize", SPAN_KIND: "task", OUTPUT_VALUE: safe_json(response)})
159+
span._set_ctx_items(
160+
{
161+
NAME: "MCP Client Initialize",
162+
SPAN_KIND: "task",
163+
OUTPUT_VALUE: safe_json(response),
164+
}
165+
)
166+
167+
server_info = getattr(response, "serverInfo", None)
168+
if not server_info:
169+
return
170+
171+
client_session_root = _find_client_session_root(span)
172+
if client_session_root:
173+
_set_or_update_tags(
174+
client_session_root,
175+
{
176+
"mcp_server_name": getattr(server_info, "name", ""),
177+
"mcp_server_version": getattr(server_info, "version", ""),
178+
"mcp_server_title": getattr(server_info, "title", ""),
179+
},
180+
)
111181

112182
def _llmobs_set_tags_list_tools(self, span: Span, args: List[Any], kwargs: Dict[str, Any], response: Any) -> None:
113183
cursor = get_argument_value(args, kwargs, 0, "cursor", optional=True)
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
features:
3+
- |
4+
mcp: Marks client mcp tool call spans as errors when the corresponding server tool call errored
5+
- |
6+
LLM Observability: Adds additional tags to MCP client session and tool call spans to power LLM Observability MCP tool call features.

tests/contrib/mcp/test_mcp_llmobs.py

Lines changed: 39 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import asyncio
2+
import importlib
23
import json
34
import os
45
from textwrap import dedent
@@ -54,19 +55,34 @@ def test_llmobs_mcp_client_calls_server(mcp_setup, mock_tracer, llmobs_events, m
5455
"isError": False,
5556
}
5657
),
57-
tags={"service": "mcptest", "ml_app": "<ml-app-name>"},
58+
tags={
59+
"service": "mcptest",
60+
"ml_app": "<ml-app-name>",
61+
"mcp_server_name": "TestServer",
62+
"mcp_tool_kind": "client",
63+
},
5864
)
5965
assert server_events[0] == _expected_llmobs_non_llm_span_event(
6066
server_span,
6167
span_kind="tool",
6268
input_value=json.dumps({"operation": "add", "a": 20, "b": 22}),
6369
output_value=json.dumps([{"type": "text", "annotations": {}, "meta": {}, "text": '{\n "result": 42\n}'}]),
64-
tags={"service": "mcptest", "ml_app": "<ml-app-name>"},
70+
tags={"service": "mcptest", "ml_app": "<ml-app-name>", "mcp_tool_kind": "server"},
6571
)
6672

6773
# asserting the remaining spans
6874
assert llmobs_events[0] == _expected_llmobs_non_llm_span_event(
69-
all_spans[0], span_kind="workflow", input_value=mock.ANY, tags={"service": "mcptest", "ml_app": "<ml-app-name>"}
75+
all_spans[0],
76+
span_kind="workflow",
77+
input_value=mock.ANY,
78+
tags={
79+
"service": "mcptest",
80+
"ml_app": "<ml-app-name>",
81+
"mcp_server_name": "TestServer",
82+
"mcp_server_version": importlib.metadata.version("mcp"),
83+
"mcp_server_title": None,
84+
},
85+
metadata=mock.ANY,
7086
)
7187

7288
assert llmobs_events[1] == _expected_llmobs_non_llm_span_event(
@@ -97,33 +113,33 @@ def test_llmobs_client_server_tool_error(mcp_setup, mock_tracer, llmobs_events,
97113
assert client_events[0]["name"] == "MCP Client Tool Call: failing_tool"
98114
assert server_events[0]["name"] == "MCP Server Tool Execute: failing_tool"
99115

100-
assert not client_span.error
116+
assert client_span.error
101117
assert server_span.error
102118

103-
assert client_events[0] == _expected_llmobs_non_llm_span_event(
104-
client_span,
105-
span_kind="tool",
106-
input_value=json.dumps({"param": "value"}),
107-
output_value=json.dumps(
108-
{
109-
"content": [
110-
{
111-
"type": "text",
112-
"annotations": {},
113-
"meta": {},
114-
"text": "Error executing tool failing_tool: Tool execution failed",
115-
}
116-
],
117-
"isError": True,
118-
}
119-
),
120-
tags={"service": "mcptest", "ml_app": "<ml-app-name>"},
119+
# assert the error client span manually
120+
assert client_events[0]["meta"]["input"]["value"] == json.dumps({"param": "value"})
121+
assert client_events[0]["meta"]["output"]["value"] == json.dumps(
122+
{
123+
"content": [
124+
{
125+
"type": "text",
126+
"annotations": {},
127+
"meta": {},
128+
"text": "Error executing tool failing_tool: Tool execution failed",
129+
}
130+
],
131+
"isError": True,
132+
}
121133
)
134+
assert client_events[0]["meta"]["error"]["message"] == "Error executing tool failing_tool: Tool execution failed"
135+
assert client_events[0]["status"] == "error"
136+
assert "error:1" in client_events[0]["tags"]
137+
122138
assert server_events[0] == _expected_llmobs_non_llm_span_event(
123139
server_span,
124140
span_kind="tool",
125141
input_value=json.dumps({"param": "value"}),
126-
tags={"service": "mcptest", "ml_app": "<ml-app-name>"},
142+
tags={"service": "mcptest", "ml_app": "<ml-app-name>", "mcp_tool_kind": "server"},
127143
error="mcp.server.fastmcp.exceptions.ToolError",
128144
error_message="Error executing tool failing_tool: Tool execution failed",
129145
error_stack=mock.ANY,

tests/snapshots/tests.contrib.mcp.test_mcp.test_mcp_tool_error.json

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -10,19 +10,19 @@
1010
"error": 0,
1111
"meta": {
1212
"_dd.p.dm": "-0",
13-
"_dd.p.tid": "68de79fc00000000",
13+
"_dd.p.tid": "68ff754800000000",
1414
"language": "python",
15-
"runtime-id": "65f2de5edad8471a88b2032aca2523ab"
15+
"runtime-id": "e2c73207083c4f57b080a3d0d670f79d"
1616
},
1717
"metrics": {
1818
"_dd.measured": 1,
1919
"_dd.top_level": 1,
2020
"_dd.tracer_kr": 1.0,
2121
"_sampling_priority_v1": 1,
22-
"process_id": 43046
22+
"process_id": 61410
2323
},
24-
"duration": 3761000,
25-
"start": 1759410684715091000
24+
"duration": 5585000,
25+
"start": 1761572168208741000
2626
},
2727
{
2828
"name": "mcp.request",
@@ -36,8 +36,8 @@
3636
"metrics": {
3737
"_dd.measured": 1
3838
},
39-
"duration": 992000,
40-
"start": 1759410684715187000
39+
"duration": 2029000,
40+
"start": 1761572168209011000
4141
},
4242
{
4343
"name": "mcp.request",
@@ -47,15 +47,16 @@
4747
"span_id": 3,
4848
"parent_id": 1,
4949
"type": "",
50-
"error": 0,
50+
"error": 1,
5151
"meta": {
52-
"_dd.p.tid": "68de79fc00000000"
52+
"_dd.p.tid": "68ff754800000000",
53+
"error.message": "Error executing tool failing_tool: Tool execution failed"
5354
},
5455
"metrics": {
5556
"_dd.measured": 1
5657
},
57-
"duration": 2386000,
58-
"start": 1759410684716255000
58+
"duration": 2875000,
59+
"start": 1761572168211125000
5960
},
6061
{
6162
"name": "mcp.request",
@@ -67,17 +68,17 @@
6768
"type": "",
6869
"error": 1,
6970
"meta": {
70-
"_dd.p.tid": "68de79fc00000000",
71+
"_dd.p.tid": "68ff754800000000",
7172
"error.message": "Error executing tool failing_tool: Tool execution failed",
72-
"error.stack": "Traceback (most recent call last):\n File \"/Users/sam.brenner/dd/dd-trace-py/.riot/venv_py31013_mock_pytest_pytest-mock_coverage_pytest-cov_opentracing_hypothesis6451_pytest-asyncio_mcp~1100/lib/python3.10/site-packages/mcp/server/fastmcp/tools/base.py\", line 98, in run\n result = await self.fn_metadata.call_fn_with_arg_validation(\n File \"/Users/sam.brenner/dd/dd-trace-py/.riot/venv_py31013_mock_pytest_pytest-mock_coverage_pytest-cov_opentracing_hypothesis6451_pytest-asyncio_mcp~1100/lib/python3.10/site-packages/mcp/server/fastmcp/utilities/func_metadata.py\", line 86, in call_fn_with_arg_validation\n return fn(**arguments_parsed_dict)\n File \"/Users/sam.brenner/dd/dd-trace-py/tests/contrib/mcp/conftest.py\", line 101, in failing_tool\n raise ValueError(\"Tool execution failed\")\nValueError: Tool execution failed\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/Users/sam.brenner/dd/dd-trace-py/ddtrace/contrib/internal/mcp/patch.py\", line 141, in traced_tool_manager_call_tool\n result = await func(*args, **kwargs)\n File \"/Users/sam.brenner/dd/dd-trace-py/.riot/venv_py31013_mock_pytest_pytest-mock_coverage_pytest-cov_opentracing_hypothesis6451_pytest-asyncio_mcp~1100/lib/python3.10/site-packages/mcp/server/fastmcp/tools/tool_manager.py\", line 83, in call_tool\n return await tool.run(arguments, context=context, convert_result=convert_result)\n File \"/Users/sam.brenner/dd/dd-trace-py/.riot/venv_py31013_mock_pytest_pytest-mock_coverage_pytest-cov_opentracing_hypothesis6451_pytest-asyncio_mcp~1100/lib/python3.10/site-packages/mcp/server/fastmcp/tools/base.py\", line 110, in run\n raise ToolError(f\"Error executing tool {self.name}: {e}\") from e\nmcp.server.fastmcp.exceptions.ToolError: Error executing tool failing_tool: Tool execution failed\n",
73+
"error.stack": "Traceback (most recent call last):\n File \"/Users/sam.brenner/dd/dd-trace-py/.riot/venv_py31013_mock_pytest_pytest-mock_coverage_pytest-cov_opentracing_hypothesis6451_pytest-asyncio_mcp~1100/lib/python3.10/site-packages/mcp/server/fastmcp/tools/base.py\", line 98, in run\n result = await self.fn_metadata.call_fn_with_arg_validation(\n File \"/Users/sam.brenner/dd/dd-trace-py/.riot/venv_py31013_mock_pytest_pytest-mock_coverage_pytest-cov_opentracing_hypothesis6451_pytest-asyncio_mcp~1100/lib/python3.10/site-packages/mcp/server/fastmcp/utilities/func_metadata.py\", line 86, in call_fn_with_arg_validation\n return fn(**arguments_parsed_dict)\n File \"/Users/sam.brenner/dd/dd-trace-py/tests/contrib/mcp/conftest.py\", line 101, in failing_tool\n raise ValueError(\"Tool execution failed\")\nValueError: Tool execution failed\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/Users/sam.brenner/dd/dd-trace-py/ddtrace/contrib/internal/mcp/patch.py\", line 153, in traced_tool_manager_call_tool\n result = await func(*args, **kwargs)\n File \"/Users/sam.brenner/dd/dd-trace-py/.riot/venv_py31013_mock_pytest_pytest-mock_coverage_pytest-cov_opentracing_hypothesis6451_pytest-asyncio_mcp~1100/lib/python3.10/site-packages/mcp/server/fastmcp/tools/tool_manager.py\", line 83, in call_tool\n return await tool.run(arguments, context=context, convert_result=convert_result)\n File \"/Users/sam.brenner/dd/dd-trace-py/.riot/venv_py31013_mock_pytest_pytest-mock_coverage_pytest-cov_opentracing_hypothesis6451_pytest-asyncio_mcp~1100/lib/python3.10/site-packages/mcp/server/fastmcp/tools/base.py\", line 110, in run\n raise ToolError(f\"Error executing tool {self.name}: {e}\") from e\nmcp.server.fastmcp.exceptions.ToolError: Error executing tool failing_tool: Tool execution failed\n",
7374
"error.type": "mcp.server.fastmcp.exceptions.ToolError",
74-
"runtime-id": "65f2de5edad8471a88b2032aca2523ab"
75+
"runtime-id": "e2c73207083c4f57b080a3d0d670f79d"
7576
},
7677
"metrics": {
7778
"_dd.measured": 1,
7879
"_dd.top_level": 1,
79-
"process_id": 43046
80+
"process_id": 61410
8081
},
81-
"duration": 1156000,
82-
"start": 1759410684717097000
82+
"duration": 1276000,
83+
"start": 1761572168212203000
8384
}]]

0 commit comments

Comments
 (0)