Skip to content

Commit f53e50c

Browse files
committed
Add FileContent class with file handling support for Response API
- Add FileContent class for handling file content with metadata - Enhance ChatMessageContent to support FileContent integration - Add comprehensive test coverage for file content functionality - Rename from_file_path to from_file method for better API consistency
1 parent 19557eb commit f53e50c

File tree

8 files changed

+536
-97
lines changed

8 files changed

+536
-97
lines changed

python/semantic_kernel/agents/open_ai/responses_agent_thread_actions.py

Lines changed: 195 additions & 58 deletions
Large diffs are not rendered by default.

python/semantic_kernel/contents/__init__.py

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,19 @@
44
from semantic_kernel.contents.audio_content import AudioContent
55
from semantic_kernel.contents.chat_history import ChatHistory
66
from semantic_kernel.contents.chat_message_content import ChatMessageContent
7+
from semantic_kernel.contents.file_content import FileContent
78
from semantic_kernel.contents.file_reference_content import FileReferenceContent
89
from semantic_kernel.contents.function_call_content import FunctionCallContent
910
from semantic_kernel.contents.function_result_content import FunctionResultContent
10-
from semantic_kernel.contents.history_reducer.chat_history_reducer import ChatHistoryReducer
11-
from semantic_kernel.contents.history_reducer.chat_history_summarization_reducer import ChatHistorySummarizationReducer
12-
from semantic_kernel.contents.history_reducer.chat_history_truncation_reducer import ChatHistoryTruncationReducer
11+
from semantic_kernel.contents.history_reducer.chat_history_reducer import (
12+
ChatHistoryReducer,
13+
)
14+
from semantic_kernel.contents.history_reducer.chat_history_summarization_reducer import (
15+
ChatHistorySummarizationReducer,
16+
)
17+
from semantic_kernel.contents.history_reducer.chat_history_truncation_reducer import (
18+
ChatHistoryTruncationReducer,
19+
)
1320
from semantic_kernel.contents.image_content import ImageContent
1421
from semantic_kernel.contents.realtime_events import (
1522
RealtimeAudioEvent,
@@ -20,9 +27,15 @@
2027
RealtimeImageEvent,
2128
RealtimeTextEvent,
2229
)
23-
from semantic_kernel.contents.streaming_annotation_content import StreamingAnnotationContent
24-
from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
25-
from semantic_kernel.contents.streaming_file_reference_content import StreamingFileReferenceContent
30+
from semantic_kernel.contents.streaming_annotation_content import (
31+
StreamingAnnotationContent,
32+
)
33+
from semantic_kernel.contents.streaming_chat_message_content import (
34+
StreamingChatMessageContent,
35+
)
36+
from semantic_kernel.contents.streaming_file_reference_content import (
37+
StreamingFileReferenceContent,
38+
)
2639
from semantic_kernel.contents.streaming_text_content import StreamingTextContent
2740
from semantic_kernel.contents.text_content import TextContent
2841
from semantic_kernel.contents.utils.author_role import AuthorRole
@@ -37,6 +50,7 @@
3750
"ChatHistorySummarizationReducer",
3851
"ChatHistoryTruncationReducer",
3952
"ChatMessageContent",
53+
"FileContent",
4054
"FileReferenceContent",
4155
"FinishReason",
4256
"FunctionCallContent",

python/semantic_kernel/contents/chat_message_content.py

Lines changed: 54 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
ANNOTATION_CONTENT_TAG,
1717
CHAT_MESSAGE_CONTENT_TAG,
1818
DISCRIMINATOR_FIELD,
19+
FILE_CONTENT_TAG,
1920
FILE_REFERENCE_CONTENT_TAG,
2021
FUNCTION_CALL_CONTENT_TAG,
2122
FUNCTION_RESULT_CONTENT_TAG,
@@ -26,12 +27,17 @@
2627
ContentTypes,
2728
)
2829
from semantic_kernel.contents.file_reference_content import FileReferenceContent
30+
from semantic_kernel.contents.file_content import FileContent
2931
from semantic_kernel.contents.function_call_content import FunctionCallContent
3032
from semantic_kernel.contents.function_result_content import FunctionResultContent
3133
from semantic_kernel.contents.image_content import ImageContent
3234
from semantic_kernel.contents.kernel_content import KernelContent
33-
from semantic_kernel.contents.streaming_annotation_content import StreamingAnnotationContent
34-
from semantic_kernel.contents.streaming_file_reference_content import StreamingFileReferenceContent
35+
from semantic_kernel.contents.streaming_annotation_content import (
36+
StreamingAnnotationContent,
37+
)
38+
from semantic_kernel.contents.streaming_file_reference_content import (
39+
StreamingFileReferenceContent,
40+
)
3541
from semantic_kernel.contents.text_content import TextContent
3642
from semantic_kernel.contents.utils.author_role import AuthorRole
3743
from semantic_kernel.contents.utils.finish_reason import FinishReason
@@ -48,6 +54,7 @@
4854
IMAGE_CONTENT_TAG: ImageContent,
4955
STREAMING_FILE_REFERENCE_CONTENT_TAG: StreamingFileReferenceContent,
5056
STREAMING_ANNOTATION_CONTENT_TAG: StreamingAnnotationContent,
57+
FILE_CONTENT_TAG: FileContent,
5158
}
5259

5360
CMC_ITEM_TYPES = Annotated[
@@ -58,6 +65,7 @@
5865
| FunctionResultContent
5966
| FunctionCallContent
6067
| FileReferenceContent
68+
| FileContent
6169
| StreamingAnnotationContent
6270
| StreamingFileReferenceContent
6371
| AudioContent,
@@ -239,7 +247,13 @@ def to_element(self) -> "Element":
239247
"""
240248
root = Element(self.tag)
241249
for field in self.model_fields_set:
242-
if field not in ["role", "name", "encoding", "finish_reason", "ai_model_id"]:
250+
if field not in [
251+
"role",
252+
"name",
253+
"encoding",
254+
"finish_reason",
255+
"ai_model_id",
256+
]:
243257
continue
244258
value = getattr(self, field)
245259
if isinstance(value, Enum):
@@ -260,15 +274,22 @@ def from_element(cls, element: Element) -> "ChatMessageContent":
260274
ChatMessageContent - The new instance of ChatMessageContent or a subclass.
261275
"""
262276
if element.tag != cls.tag:
263-
raise ContentInitializationError(f"Element tag is not {cls.tag}") # pragma: no cover
277+
raise ContentInitializationError(
278+
f"Element tag is not {cls.tag}"
279+
) # pragma: no cover
264280
kwargs: dict[str, Any] = {key: value for key, value in element.items()}
265281
items: list[KernelContent] = []
266282
if element.text:
267283
items.append(TextContent(text=unescape(element.text)))
268284
for child in element:
269285
if child.tag not in TAG_CONTENT_MAP:
270-
logger.warning('Unknown tag "%s" in ChatMessageContent, treating as text', child.tag)
271-
text = ElementTree.tostring(child, encoding="unicode", short_empty_elements=False)
286+
logger.warning(
287+
'Unknown tag "%s" in ChatMessageContent, treating as text',
288+
child.tag,
289+
)
290+
text = ElementTree.tostring(
291+
child, encoding="unicode", short_empty_elements=False
292+
)
272293
items.append(TextContent(text=unescape(text) or ""))
273294
else:
274295
items.append(TAG_CONTENT_MAP[child.tag].from_element(child)) # type: ignore
@@ -294,9 +315,13 @@ def to_prompt(self) -> str:
294315
str - The prompt from the ChatMessageContent.
295316
"""
296317
root = self.to_element()
297-
return ElementTree.tostring(root, encoding=self.encoding or "unicode", short_empty_elements=False)
318+
return ElementTree.tostring(
319+
root, encoding=self.encoding or "unicode", short_empty_elements=False
320+
)
298321

299-
def to_dict(self, role_key: str = "role", content_key: str = "content") -> dict[str, Any]:
322+
def to_dict(
323+
self, role_key: str = "role", content_key: str = "content"
324+
) -> dict[str, Any]:
300325
"""Serialize the ChatMessageContent to a dictionary.
301326
302327
Returns:
@@ -305,8 +330,14 @@ def to_dict(self, role_key: str = "role", content_key: str = "content") -> dict[
305330
ret: dict[str, Any] = {
306331
role_key: self.role.value,
307332
}
308-
if self.role == AuthorRole.ASSISTANT and any(isinstance(item, FunctionCallContent) for item in self.items):
309-
ret["tool_calls"] = [item.to_dict() for item in self.items if isinstance(item, FunctionCallContent)]
333+
if self.role == AuthorRole.ASSISTANT and any(
334+
isinstance(item, FunctionCallContent) for item in self.items
335+
):
336+
ret["tool_calls"] = [
337+
item.to_dict()
338+
for item in self.items
339+
if isinstance(item, FunctionCallContent)
340+
]
310341
else:
311342
ret[content_key] = self._parse_items()
312343
if self.role == AuthorRole.TOOL:
@@ -330,5 +361,16 @@ def _parse_items(self) -> str | list[dict[str, Any]]:
330361

331362
def __hash__(self) -> int:
332363
"""Return the hash of the chat message content."""
333-
hashable_items = [make_hashable(item) for item in self.items] if self.items else []
334-
return hash((self.tag, self.role, self.content, self.encoding, self.finish_reason, *hashable_items))
364+
hashable_items = (
365+
[make_hashable(item) for item in self.items] if self.items else []
366+
)
367+
return hash(
368+
(
369+
self.tag,
370+
self.role,
371+
self.content,
372+
self.encoding,
373+
self.finish_reason,
374+
*hashable_items,
375+
)
376+
)

python/semantic_kernel/contents/const.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
IMAGE_CONTENT_TAG: Final[str] = "image"
1010
ANNOTATION_CONTENT_TAG: Final[str] = "annotation"
1111
STREAMING_ANNOTATION_CONTENT_TAG: Final[str] = "streaming_annotation"
12+
FILE_CONTENT_TAG: Final[str] = "file"
1213
BINARY_CONTENT_TAG: Final[str] = "binary"
1314
FILE_REFERENCE_CONTENT_TAG: Final[str] = "file_reference"
1415
STREAMING_FILE_REFERENCE_CONTENT_TAG: Final[str] = "streaming_file_reference"
@@ -25,6 +26,7 @@ class ContentTypes(str, Enum):
2526
BINARY_CONTENT = BINARY_CONTENT_TAG
2627
CHAT_MESSAGE_CONTENT = CHAT_MESSAGE_CONTENT_TAG
2728
IMAGE_CONTENT = IMAGE_CONTENT_TAG
29+
FILE_CONTENT = FILE_CONTENT_TAG
2830
FILE_REFERENCE_CONTENT = FILE_REFERENCE_CONTENT_TAG
2931
FUNCTION_CALL_CONTENT = FUNCTION_CALL_CONTENT_TAG
3032
FUNCTION_RESULT_CONTENT = FUNCTION_RESULT_CONTENT_TAG
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
# Copyright (c) Microsoft. All rights reserved.
2+
3+
import logging
4+
import mimetypes
5+
import os
6+
from typing import Any, ClassVar, Literal, TypeVar
7+
from numpy import ndarray
8+
from pydantic import Field
9+
from semantic_kernel.contents.binary_content import BinaryContent
10+
from semantic_kernel.contents.const import ContentTypes, FILE_CONTENT_TAG
11+
from semantic_kernel.utils.feature_stage_decorator import experimental
12+
13+
logger = logging.getLogger(__name__)
14+
15+
_T = TypeVar("_T", bound="FileContent")
16+
17+
18+
@experimental
19+
class FileContent(BinaryContent):
20+
"""File Content class.
21+
22+
This can be created either from bytes data or a file path. The filename and mime_type are required for correct serialization.
23+
"""
24+
25+
content_type: Literal[ContentTypes.FILE_CONTENT] = Field(FILE_CONTENT_TAG, init=False) # type: ignore
26+
tag: ClassVar[str] = FILE_CONTENT_TAG
27+
filename: str | None = None
28+
29+
def __init__(
30+
self,
31+
filename: str | None = None,
32+
uri: str | None = None,
33+
data_uri: str | None = None,
34+
data: str | bytes | ndarray | None = None,
35+
data_format: str | None = None,
36+
mime_type: str | None = None,
37+
**kwargs: Any,
38+
):
39+
# Always use base64 for file data for consistency with serialization
40+
if data is not None and data_format is None:
41+
data_format = "base64"
42+
super().__init__(
43+
uri=uri,
44+
data_uri=data_uri,
45+
data=data,
46+
data_format=data_format,
47+
mime_type=mime_type,
48+
**kwargs,
49+
)
50+
self.filename = filename
51+
if self.filename is None and uri is not None:
52+
self.filename = os.path.basename(uri)
53+
54+
@classmethod
55+
def from_file(cls: type[_T], file_path: str, **kwargs: Any) -> _T:
56+
mime_type, _ = mimetypes.guess_type(file_path)
57+
with open(file_path, "rb") as f:
58+
data = f.read()
59+
filename = os.path.basename(file_path)
60+
# Always use base64 for file data for consistency with serialization
61+
return cls(
62+
filename=filename,
63+
data=data,
64+
mime_type=mime_type,
65+
data_format="base64",
66+
**kwargs,
67+
)
68+
69+
def __str__(self) -> str:
70+
if self.data is not None and self.mime_type is not None:
71+
import base64
72+
73+
encoded = base64.b64encode(self.data).decode("ascii")
74+
return f"data:{self.mime_type};base64,{encoded}"
75+
elif self.uri is not None:
76+
return str(self.uri)
77+
return ""
78+
79+
def to_element(self) -> Any:
80+
return {"type": "input_file", "filename": self.filename, "file_data": str(self)}
81+
82+
@classmethod
83+
def from_element(cls: type[_T], element: Any) -> _T:
84+
# Parse file_data as data URI if present
85+
import re
86+
import base64
87+
88+
file_data = element.get("file_data")
89+
filename = element.get("filename")
90+
data = None
91+
mime_type = None
92+
data_format = None
93+
if file_data and file_data.startswith("data:"):
94+
# Example: data:application/pdf;base64,....
95+
match = re.match(r"data:([^;]+);base64,(.*)", file_data)
96+
if match:
97+
mime_type = match.group(1)
98+
data = base64.b64decode(match.group(2))
99+
data_format = "base64"
100+
return cls(
101+
filename=filename, data=data, mime_type=mime_type, data_format=data_format
102+
)
103+
104+
def to_dict(self) -> dict[str, Any]:
105+
return {"type": "input_file", "filename": self.filename, "file_data": str(self)}

python/tests/assets/sample_file.pdf

617 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)