Merge pull request #19 from video-db/re-summary

Summarize reasoning engine response and misc
video-db · Oct 28, 2024 · ed86d06 · ed86d06
2 parents 7af7605 + 709da1f
commit ed86d06
Show file tree

Hide file tree

Showing 8 changed files with 85 additions and 22 deletions.
diff --git a/Makefile b/Makefile
@@ -71,7 +71,7 @@ run-fe:
 # Start both backend and frontend
 run:
 	@echo "Starting backend and frontend..."
-	@trap 'kill 0' INT; \
+	@trap 'kill $(jobs -p)' INT; \
 	($(MAKE) run-be 2>&1 | sed 's/^/[BACKEND] /' ) & \
 	($(MAKE) run-fe 2>&1 | sed 's/^/[FRONTEND] /' ) & \
 	wait
diff --git a/backend/spielberg/agents/download.py b/backend/spielberg/agents/download.py
@@ -37,6 +37,6 @@ def run(
             return AgentResponse(status=AgentStatus.ERROR, message=str(e))
         return AgentResponse(
             status=AgentStatus.SUCCESS,
-            message="Download successful",
+            message="Download successful but not dispalyed, send it in the summary.",
             data=download_response,
         )
diff --git a/backend/spielberg/agents/pricing.py b/backend/spielberg/agents/pricing.py
@@ -142,6 +142,8 @@ def run(self, query: str, *args, **kwargs) -> AgentResponse:
 
         return AgentResponse(
             status=AgentStatus.SUCCESS,
-            message="Fetch successful and output displayed above.",
-            data={},
+            message="Agent run successful",
+            data={
+                "response": llm_response.content
+            },
         )
diff --git a/backend/spielberg/agents/stream_video.py b/backend/spielberg/agents/stream_video.py
@@ -83,5 +83,7 @@ def run(
         return AgentResponse(
             status=AgentStatus.SUCCESS,
             message=f"Agent {self.name} completed successfully.",
-            data={},
+            data={
+                "stream_url": stream_url
+            },
         )
diff --git a/backend/spielberg/agents/thumbnail.py b/backend/spielberg/agents/thumbnail.py
@@ -43,6 +43,7 @@ def run(
         try:
             self.output_message.actions.append("Generating thumbnail..")
             image_content = ImageContent(agent_name=self.agent_name)
+            image_content.status_message = "Generating thumbnail.."
             self.output_message.content.append(image_content)
             self.output_message.push_update()
 

diff --git a/backend/spielberg/agents/summary.py → backend/spielberg/agents/video_summary.py b/backend/spielberg/agents/summary.py → backend/spielberg/agents/video_summary.py
@@ -7,23 +7,26 @@
 
 logger = logging.getLogger(__name__)
 
-SUMMARY_AGENT_PROMPT = """
-Create a comprehensive, in-depth summary that is clear and concise.
-Focus strictly on the main ideas and essential information from the provided text, eliminating any unnecessary language or details.
-"""
 
-
-class SummaryAgent(BaseAgent):
+class VideoSummaryAgent(BaseAgent):
     def __init__(self, session=None, **kwargs):
-        self.agent_name = "summary"
-        self.description = "This is an agent to summarize the given video of VideoDB."
+        self.agent_name = "video_summary"
+        self.description = "This is an agent to summarize the given video of VideoDB, if the user wants a certain kind of summary the prompt is required."
         self.llm = OpenAI()
         self.parameters = self.get_parameters()
         super().__init__(session=session, **kwargs)
 
-    def run(self, collection_id: str, video_id: str) -> AgentResponse:
+    def run(self, collection_id: str, video_id: str, prompt: str) -> AgentResponse:
         """
         Generate summary of the given video.
+
+        :param str collection_id: The collection_id where given video_id is available.
+        :param str video_id: The id of the video for which the video player is required.
+        :param args: Additional positional arguments.
+        :param kwargs: Additional keyword arguments.
+        :return: The response containing information about the sample processing operation.
+        :rtype: AgentResponse
+
         """
         try:
             self.output_message.actions.append("Started summary generation..")
@@ -41,7 +44,7 @@ def run(self, collection_id: str, video_id: str) -> AgentResponse:
                 self.output_message.push_update()
                 videodb_tool.index_spoken_words(video_id)
                 transcript_text = videodb_tool.get_transcript(video_id)
-            summary_llm_prompt = f"{SUMMARY_AGENT_PROMPT} {transcript_text}"
+            summary_llm_prompt = f"{transcript_text} {prompt}"
             summary_llm_message = ContextMessage(
                 content=summary_llm_prompt, role=RoleTypes.user
             )
@@ -58,7 +61,7 @@ def run(self, collection_id: str, video_id: str) -> AgentResponse:
             summary = llm_response.content
             output_text_content.text = summary
             output_text_content.status = MsgStatus.success
-            output_text_content.status_message = "Summary generated successfully."
+            output_text_content.status_message = "Here is your summary"
             self.output_message.publish()
         except Exception as e:
             logger.exception(f"Error in {self.agent_name} agent.")

diff --git a/backend/spielberg/core/reasoning.py b/backend/spielberg/core/reasoning.py
@@ -35,6 +35,11 @@
        - 7.3. Perform the initial action which required video id.
     """.strip()
 
+SUMMARIZATION_PROMPT = """
+Generate succinct summary for the user stating what all happened with agents on basis of above responses by agents.
+Agent responses are already displayed to the user until specified explicitly in which case include the responses in the summary.
+"""
+
 
 class ReasoningEngine:
     """The Reasoning Engine is the core class that directly interfaces with the user. It interprets natural language input in any conversation and orchestrates agents to fulfill the user's requests. The primary functions of the Reasoning Engine are:
@@ -63,6 +68,7 @@ def __init__(
         self.agents: List[BaseAgent] = []
         self.stop_flag = False
         self.output_message: OutputMessage = self.session.output_message
+        self.summary_content = None
 
     def register_agents(self, agents: List[BaseAgent]):
         """Register an agents.
@@ -110,6 +116,26 @@ def build_context(self):
                 )
             self.session.reasoning_context.append(input_context)
 
+    def get_current_run_context(self):
+        for i in range(len(self.session.reasoning_context) - 1, -1, -1):
+            if self.session.reasoning_context[i].role == RoleTypes.user:
+                return self.session.reasoning_context[i:]
+        return []
+
+    def remove_summary_content(self):
+        for i in range(len(self.output_message.content) - 1, -1, -1):
+            if self.output_message.content[i].agent_name == "reasoning_engine":
+                self.output_message.content.pop(i)
+                self.summary_content = None
+
+    def add_summary_content(self):
+        self.summary_content = TextContent(agent_name="reasoning_engine")
+        self.output_message.content.append(self.summary_content)
+        self.summary_content.status_message = "Consolidating outcomes..."
+        self.summary_content.status = MsgStatus.progress
+        self.output_message.push_update()
+        return self.summary_content
+
     def run_agent(self, agent_name: str, *args, **kwargs) -> AgentResponse:
         """Run an agent with the given name and arguments.
 
@@ -166,6 +192,9 @@ def step(self):
                 break
 
             if llm_response.tool_calls:
+                if self.summary_content:
+                    self.remove_summary_content()
+
                 self.session.reasoning_context.append(
                     ContextMessage(
                         content=llm_response.content,
@@ -189,6 +218,9 @@ def step(self):
                     print(agent_response, "\n\n")
                     status = agent_response.status
 
+            if not self.summary_content:
+                self.add_summary_content()
+
             if (
                 llm_response.finish_reason == "stop"
                 or llm_response.finish_reason == "end_turn"
@@ -200,10 +232,33 @@ def step(self):
                         role=RoleTypes.assistant,
                     )
                 )
-                text_content = TextContent(text=llm_response.content)
-                text_content.status = MsgStatus.success
-                text_content.status_message = "Here is the summary of the response"
-                self.output_message.content.append(text_content)
+                if self.iterations == self.max_iterations - 1:
+                    # Direct response case
+                    self.summary_content.status_message = (
+                        "Here is the the response"
+                    )
+                    self.summary_content.text = llm_response.content
+                    self.summary_content.status = MsgStatus.success
+                else:
+                    self.session.reasoning_context.append(
+                        ContextMessage(
+                            content=SUMMARIZATION_PROMPT.format(
+                                query=self.input_message.content
+                            ),
+                            role=RoleTypes.system,
+                        )
+                    )
+                    summary_response = self.llm.chat_completions(
+                        messages=[
+                            message.to_llm_msg()
+                            for message in self.get_current_run_context()
+                        ]
+                    )
+                    self.summary_content.text = summary_response.content
+                    self.summary_content.status = MsgStatus.success
+                    self.summary_content.status_message = (
+                        "Here is the summary of the run"
+                    )
                 self.output_message.status = MsgStatus.success
                 self.output_message.publish()
                 print("-" * 40, "Stopping", "-" * 40)

diff --git a/backend/spielberg/handler.py b/backend/spielberg/handler.py
@@ -4,7 +4,7 @@
 from dotenv import dotenv_values
 
 from spielberg.agents.thumbnail import ThumbnailAgent
-from spielberg.agents.summary import SummaryAgent
+from spielberg.agents.video_summary import VideoSummaryAgent
 from spielberg.agents.download import DownloadAgent
 from spielberg.agents.pricing import PricingAgent
 from spielberg.agents.upload import UploadAgent
@@ -32,7 +32,7 @@ def __init__(self, db, **kwargs):
         # Register the agents here
         self.agents = [
             ThumbnailAgent,
-            SummaryAgent,
+            VideoSummaryAgent,
             DownloadAgent,
             PricingAgent,
             UploadAgent,