Added Audio to FastMCP (#1130)

dragonier23 · web-flow · commit f4b2957a20fa · 2025-08-22T11:45:38.000+01:00
diff --git a/src/mcp/server/fastmcp/__init__.py b/src/mcp/server/fastmcp/__init__.py
@@ -3,7 +3,7 @@
 from importlib.metadata import version
 
 from .server import Context, FastMCP
-from .utilities.types import Image
+from .utilities.types import Audio, Image
 
 __version__ = version("mcp")
-__all__ = ["FastMCP", "Context", "Image"]
+__all__ = ["FastMCP", "Context", "Image", "Audio"]
diff --git a/src/mcp/server/fastmcp/utilities/func_metadata.py b/src/mcp/server/fastmcp/utilities/func_metadata.py
@@ -21,7 +21,7 @@
 
 from mcp.server.fastmcp.exceptions import InvalidSignature
 from mcp.server.fastmcp.utilities.logging import get_logger
-from mcp.server.fastmcp.utilities.types import Image
+from mcp.server.fastmcp.utilities.types import Audio, Image
 from mcp.types import ContentBlock, TextContent
 
 logger = get_logger(__name__)
@@ -506,6 +506,9 @@ def _convert_to_content(
     if isinstance(result, Image):
         return [result.to_image_content()]
 
+    if isinstance(result, Audio):
+        return [result.to_audio_content()]
+
     if isinstance(result, list | tuple):
         return list(
             chain.from_iterable(
diff --git a/src/mcp/server/fastmcp/utilities/types.py b/src/mcp/server/fastmcp/utilities/types.py
@@ -3,7 +3,7 @@
 import base64
 from pathlib import Path
 
-from mcp.types import ImageContent
+from mcp.types import AudioContent, ImageContent
 
 
 class Image:
@@ -52,3 +52,50 @@ def to_image_content(self) -> ImageContent:
             raise ValueError("No image data available")
 
         return ImageContent(type="image", data=data, mimeType=self._mime_type)
+
+
+class Audio:
+    """Helper class for returning audio from tools."""
+
+    def __init__(
+        self,
+        path: str | Path | None = None,
+        data: bytes | None = None,
+        format: str | None = None,
+    ):
+        if not bool(path) ^ bool(data):
+            raise ValueError("Either path or data can be provided")
+
+        self.path = Path(path) if path else None
+        self.data = data
+        self._format = format
+        self._mime_type = self._get_mime_type()
+
+    def _get_mime_type(self) -> str:
+        """Get MIME type from format or guess from file extension."""
+        if self._format:
+            return f"audio/{self._format.lower()}"
+
+        if self.path:
+            suffix = self.path.suffix.lower()
+            return {
+                ".wav": "audio/wav",
+                ".mp3": "audio/mpeg",
+                ".ogg": "audio/ogg",
+                ".flac": "audio/flac",
+                ".aac": "audio/aac",
+                ".m4a": "audio/mp4",
+            }.get(suffix, "application/octet-stream")
+        return "audio/wav"  # default for raw binary data
+
+    def to_audio_content(self) -> AudioContent:
+        """Convert to MCP AudioContent."""
+        if self.path:
+            with open(self.path, "rb") as f:
+                data = base64.b64encode(f.read()).decode()
+        elif self.data is not None:
+            data = base64.b64encode(self.data).decode()
+        else:
+            raise ValueError("No audio data available")
+
+        return AudioContent(type="audio", data=data, mimeType=self._mime_type)
diff --git a/tests/server/fastmcp/test_server.py b/tests/server/fastmcp/test_server.py
@@ -10,7 +10,7 @@
 from mcp.server.fastmcp import Context, FastMCP
 from mcp.server.fastmcp.prompts.base import Message, UserMessage
 from mcp.server.fastmcp.resources import FileResource, FunctionResource
-from mcp.server.fastmcp.utilities.types import Image
+from mcp.server.fastmcp.utilities.types import Audio, Image
 from mcp.server.session import ServerSession
 from mcp.shared.exceptions import McpError
 from mcp.shared.memory import (
@@ -195,6 +195,10 @@ def image_tool_fn(path: str) -> Image:
     return Image(path)
 
 
+def audio_tool_fn(path: str) -> Audio:
+    return Audio(path)
+
+
 def mixed_content_tool_fn() -> list[ContentBlock]:
     return [
         TextContent(type="text", text="Hello"),
@@ -300,6 +304,60 @@ async def test_tool_image_helper(self, tmp_path: Path):
             # Check structured content - Image return type should NOT have structured output
             assert result.structuredContent is None
 
+    @pytest.mark.anyio
+    async def test_tool_audio_helper(self, tmp_path: Path):
+        # Create a test audio
+        audio_path = tmp_path / "test.wav"
+        audio_path.write_bytes(b"fake wav data")
+
+        mcp = FastMCP()
+        mcp.add_tool(audio_tool_fn)
+        async with client_session(mcp._mcp_server) as client:
+            result = await client.call_tool("audio_tool_fn", {"path": str(audio_path)})
+            assert len(result.content) == 1
+            content = result.content[0]
+            assert isinstance(content, AudioContent)
+            assert content.type == "audio"
+            assert content.mimeType == "audio/wav"
+            # Verify base64 encoding
+            decoded = base64.b64decode(content.data)
+            assert decoded == b"fake wav data"
+            # Check structured content - Image return type should NOT have structured output
+            assert result.structuredContent is None
+
+    @pytest.mark.parametrize(
+        "filename,expected_mime_type",
+        [
+            ("test.wav", "audio/wav"),
+            ("test.mp3", "audio/mpeg"),
+            ("test.ogg", "audio/ogg"),
+            ("test.flac", "audio/flac"),
+            ("test.aac", "audio/aac"),
+            ("test.m4a", "audio/mp4"),
+            ("test.unknown", "application/octet-stream"),  # Unknown extension fallback
+        ],
+    )
+    @pytest.mark.anyio
+    async def test_tool_audio_suffix_detection(self, tmp_path: Path, filename: str, expected_mime_type: str):
+        """Test that Audio helper correctly detects MIME types from file suffixes"""
+        mcp = FastMCP()
+        mcp.add_tool(audio_tool_fn)
+
+        # Create a test audio file with the specific extension
+        audio_path = tmp_path / filename
+        audio_path.write_bytes(b"fake audio data")
+
+        async with client_session(mcp._mcp_server) as client:
+            result = await client.call_tool("audio_tool_fn", {"path": str(audio_path)})
+            assert len(result.content) == 1
+            content = result.content[0]
+            assert isinstance(content, AudioContent)
+            assert content.type == "audio"
+            assert content.mimeType == expected_mime_type
+            # Verify base64 encoding
+            decoded = base64.b64decode(content.data)
+            assert decoded == b"fake audio data"
+
     @pytest.mark.anyio
     async def test_tool_mixed_content(self):
         mcp = FastMCP()
@@ -332,19 +390,24 @@ async def test_tool_mixed_content(self):
                     assert structured_result[i][key] == value
 
     @pytest.mark.anyio
-    async def test_tool_mixed_list_with_image(self, tmp_path: Path):
+    async def test_tool_mixed_list_with_audio_and_image(self, tmp_path: Path):
         """Test that lists containing Image objects and other types are handled
         correctly"""
         # Create a test image
         image_path = tmp_path / "test.png"
         image_path.write_bytes(b"test image data")
 
+        # Create a test audio
+        audio_path = tmp_path / "test.wav"
+        audio_path.write_bytes(b"test audio data")
+
         # TODO(Marcelo): It seems if we add the proper type hint, it generates an invalid JSON schema.
         # We need to fix this.
         def mixed_list_fn() -> list:  # type: ignore
             return [  # type: ignore
                 "text message",
                 Image(image_path),
+                Audio(audio_path),
                 {"key": "value"},
                 TextContent(type="text", text="direct content"),
             ]
@@ -353,7 +416,7 @@ def mixed_list_fn() -> list:  # type: ignore
         mcp.add_tool(mixed_list_fn)  # type: ignore
         async with client_session(mcp._mcp_server) as client:
             result = await client.call_tool("mixed_list_fn", {})
-            assert len(result.content) == 4
+            assert len(result.content) == 5
             # Check text conversion
             content1 = result.content[0]
             assert isinstance(content1, TextContent)
@@ -363,14 +426,19 @@ def mixed_list_fn() -> list:  # type: ignore
             assert isinstance(content2, ImageContent)
             assert content2.mimeType == "image/png"
             assert base64.b64decode(content2.data) == b"test image data"
-            # Check dict conversion
+            # Check audio conversion
             content3 = result.content[2]
-            assert isinstance(content3, TextContent)
-            assert '"key": "value"' in content3.text
-            # Check direct TextContent
+            assert isinstance(content3, AudioContent)
+            assert content3.mimeType == "audio/wav"
+            assert base64.b64decode(content3.data) == b"test audio data"
+            # Check dict conversion
             content4 = result.content[3]
             assert isinstance(content4, TextContent)
-            assert content4.text == "direct content"
+            assert '"key": "value"' in content4.text
+            # Check direct TextContent
+            content5 = result.content[4]
+            assert isinstance(content5, TextContent)
+            assert content5.text == "direct content"
             # Check structured content - untyped list with Image objects should NOT have structured output
             assert result.structuredContent is None