letta-ai · rohit-rptless · Aug 20, 2024 · Aug 20, 2024 · Aug 20, 2024 · cpacker
diff --git a/memgpt/agent_store/db.py b/memgpt/agent_store/db.py
@@ -156,6 +156,7 @@ class MessageModel(Base):
             # openai info
             role = Column(String, nullable=False)
             text = Column(String)  # optional: can be null if function call
+            mm_content = Column(JSON)  # optional: multi-modal input
             model = Column(String)  # optional: can be null if LLM backend doesn't require specifying
             name = Column(String)  # optional: multi-agent only
 
@@ -192,6 +193,7 @@ def to_record(self):
                     role=self.role,
                     name=self.name,
                     text=self.text,
+                    mm_content=self.mm_content,
                     model=self.model,
                     # tool_calls=[ToolCall(id=tool_call["id"], function=ToolCallFunction(**tool_call["function"])) for tool_call in self.tool_calls] if self.tool_calls else None,
                     tool_calls=self.tool_calls,

diff --git a/memgpt/schemas/message.py b/memgpt/schemas/message.py
@@ -62,6 +62,8 @@ class Message(BaseMessage):
     id: str = BaseMessage.generate_id_field()
     role: MessageRole = Field(..., description="The role of the participant.")
     text: str = Field(..., description="The text of the message.")
+    # Field mm_content is only used when role is 'user'. It needs to be mapped to MultiModalMessage
+    mm_content: List[dict] = Field(None, description="Multi modal content entered by the user.")
     user_id: str = Field(None, description="The unique identifier of the user.")
     agent_id: str = Field(None, description="The unique identifier of the agent.")
     model: Optional[str] = Field(None, description="The model used to make the function call.")
@@ -223,8 +225,9 @@ def to_openai_dict(
 
         elif self.role == "user":
             assert all([v is not None for v in [self.text, self.role]]), vars(self)
+            content = self.mm_content if self.mm_content is not None else self.text
             openai_message = {
-                "content": self.text,
+                "content": content,
                 "role": self.role,
             }
             # Optional field, do not include if null

diff --git a/memgpt/schemas/openai/chat_completion_request.py b/memgpt/schemas/openai/chat_completion_request.py
@@ -8,13 +8,15 @@ class SystemMessage(BaseModel):
     role: str = "system"
     name: Optional[str] = None
 
+class MultiModalMessage(BaseModel):
+    type: str
+    image_url: str
 
 class UserMessage(BaseModel):
-    content: Union[str, List[str]]
+    content: Union[str, List[MultiModalMessage]]
     role: str = "user"
     name: Optional[str] = None
 
-
 class ToolCallFunction(BaseModel):
     name: str
     arguments: str