Merge branch 'main' into ankit/refactor

video-db · Oct 23, 2024 · 9150c03 · 9150c03
2 parents cbf4892 + d977dc1
commit 9150c03
Show file tree

Hide file tree

Showing 18 changed files with 245 additions and 58 deletions.
diff --git a/backend/spielberg/agents/upload.py b/backend/spielberg/agents/upload.py
@@ -21,6 +21,10 @@
             "type": "string",
             "description": "URL to upload the content",
         },
+        "name": {
+            "type": "string",
+            "description": "Name of the content to upload",
+        },
         "media_type": {
             "type": "string",
             "enum": ["video", "audio", "image"],
@@ -42,7 +46,7 @@ def __init__(self, session: Session, **kwargs):
         self.parameters = UPLOAD_AGENT_PARAMETERS
         super().__init__(session=session, **kwargs)
 
-    def _upload(self, url: str, media_type: str):
+    def _upload(self, url: str, media_type: str, name: str):
         """Upload the media with the given URL."""
         try:
             if media_type == "video":
@@ -57,7 +61,7 @@ def _upload(self, url: str, media_type: str):
             content.status_message = f"Uploading {media_type}..."
             self.output_message.push_update()
 
-            upload_data = self.videodb_tool.upload(url, media_type)
+            upload_data = self.videodb_tool.upload(url, media_type, name=name)
 
             content.status_message = f"{upload_data['name']} uploaded successfully"
             if media_type == "video":
@@ -119,7 +123,13 @@ def _upload_yt_playlist(self, playlist_info: dict, media_type):
         )
 
     def run(
-        self, url: str, media_type="video", collection_id: str = None, *args, **kwargs
+        self,
+        url: str,
+        media_type="video",
+        collection_id: str = None,
+        name: str = None,
+        *args,
+        **kwargs,
     ) -> AgentResponse:
         """
         Upload the media with the given URL.
@@ -146,3 +156,4 @@ def run(
 
         # upload the media
         return self._upload(url, media_type)
+
diff --git a/backend/spielberg/core/reasoning.py b/backend/spielberg/core/reasoning.py
@@ -37,7 +37,13 @@
 
 
 class ReasoningEngine:
-    """The ReasoningEngine class."""
+    """The Reasoning Engine is the core class that directly interfaces with the user. It interprets natural language input in any conversation and orchestrates agents to fulfill the user's requests. The primary functions of the Reasoning Engine are:
+
+    * Maintain Context of Conversational History: Manage memory, context limits, input, and output experiences to ensure coherent and context-aware interactions.
+    * Natural Language Understanding (NLU): Uses LLMs of your choice to have understanding of the task.
+    * Intelligent Reference Deduction: Intelligently deduce references to previous messages, outputs, files, agents, etc., to provide relevant and accurate responses.
+    * Agent Orchestration: Decide on agents and their workflows to fulfill requests. Multiple strategies can be employed to create agent workflows, such as step-by-step processes or chaining of agents provided by default.
+    * Final Control Over Conversation Flow: Maintain ultimate control over the flow of conversation with the user, ensuring coherence and goal alignment."""
 
     def __init__(
         self,
@@ -46,8 +52,8 @@ def __init__(
     ):
         """Initialize the ReasoningEngine.
 
-        :param InputMessage input_message: The input message to the reasoning engine
-        :param Session session: The session instance
+        :param input_message: The input message to the reasoning engine.
+        :param session: The session instance.
         """
         self.input_message = input_message
         self.session = session
@@ -61,7 +67,7 @@ def __init__(
     def register_agents(self, agents: List[BaseAgent]):
         """Register an agents.
 
-        :param List[BaseAgent] agents: The list of agents to register
+        :param agents: The list of agents to register.
         """
         self.agents.extend(agents)
 
@@ -110,8 +116,7 @@ def run_agent(self, agent_name: str, *args, **kwargs) -> AgentResponse:
         :param str agent_name: The name of the agent to run
         :param args: The arguments to pass to the agent
         :param kwargs: The keyword arguments to pass to the agent
-        :return: :class:`AgentResponse` instance
-        :rtype: AgentResponse
+        :return: The response from the agent
         """
         print("-" * 40, f"Running {agent_name} Agent", "-" * 40)
         print(kwargs, "\n\n")
@@ -208,7 +213,7 @@ def step(self):
     def run(self, max_iterations: int = None):
         """Run the reasoning engine.
 
-        :param int max_iterations: (optional) The number of max_iterations to run the reasoning engine
+        :param int max_iterations: The number of max_iterations to run the reasoning engine
         """
         self.iterations = max_iterations or self.max_iterations
         self.build_context()

diff --git a/backend/spielberg/core/session.py b/backend/spielberg/core/session.py
@@ -102,7 +102,7 @@ class BaseMessage(BaseModel):
     :param str session_id: Session is of the messages
     :param str conv_id: Conversation id
     :param int msg_id: (optional) Message id
-    :param MsgType msg_type: (optional) :class:`MsgType` of the message
+    :param msg_type: Type of the message
     """
 
     model_config = ConfigDict(
@@ -126,7 +126,11 @@ class BaseMessage(BaseModel):
 
 
 class InputMessage(BaseMessage):
-    """Input message to the agent"""
+    """Input message to the agent
+    
+    :param BaseDB db: Database instance
+    :param MsgType msg_type: :class:`MsgType` of the message
+    """
 
     db: BaseDB
     msg_type: MsgType = MsgType.input

diff --git a/backend/spielberg/tools/videodb_tool.py b/backend/spielberg/tools/videodb_tool.py
@@ -64,16 +64,20 @@ def get_videos(self):
             for video in videos
         ]
 
-    def upload(self, url, media_type):
-        media = self.conn.upload(url=url, media_type=media_type)
+    def upload(self, url, media_type, name=None):
+        if name is None:
+            media = self.conn.upload(url=url, media_type=media_type)
+            name = media.name
+        else:
+            media = self.conn.upload(url=url, media_type=media_type, name=name)
 
         if media_type == "video":
             return {
                 "id": media.id,
                 "collection_id": media.collection_id,
                 "stream_url": media.stream_url,
                 "player_url": media.player_url,
-                "name": media.name,
+                "name": name,
                 "description": media.description,
                 "thumbnail_url": media.thumbnail_url,
                 "length": media.length,

diff --git a/docs/README.md b/docs/README.md
@@ -15,7 +15,7 @@ Make install-be
 
 ### Start the documentation server
 ```bash
-mkdocs serve
+mkdocs serve -w ./backend
 ```
 
 

diff --git a/docs/assets/favicon.png b/docs/assets/favicon.png
diff --git a/docs/assets/logo.png b/docs/assets/logo.png
diff --git a/docs/concepts/overview.md b/docs/concepts/overview.md
@@ -1,12 +1,39 @@
-# Reasoning Engine
+## Reasoning Engine
 
-The Reasoning Engine is the core of the system. It is responsible for processing the input data and generating the output data. The Reasoning Engine is a collection of modules that work together to perform the reasoning process. Each module is responsible for a specific task, such as data processing, rule evaluation, or output generation.
+The Reasoning Engine is the core component that directly interfaces with the user. It interprets natural language input in any conversation and orchestrates agents to fulfill the user's requests. The primary functions of the Reasoning Engine are:
 
+* Maintain Context of Conversational History: Manage memory, context limits, input, and output experiences to ensure coherent and context-aware interactions.
+* Natural Language Understanding (NLU): Uses LLMs of your choice to have understanding of the task. 
+* Intelligent Reference Deduction: Intelligently deduce references to previous messages, outputs, files, agents, etc., to provide relevant and accurate responses.
+* Agent Orchestration: Decide on agents and their workflows to fulfill requests. Multiple strategies can be employed to create agent workflows, such as step-by-step processes or chaining of agents provided by default.
+* Final Control Over Conversation Flow: Maintain ultimate control over the flow of conversation with the user, ensuring coherence and goal alignment.
 
-# Agents
 
-Agents are the core building blocks of the Reasoning Engine. They are responsible for processing the input data and generating the output data. Agents are designed to be modular and extensible, allowing developers to easily add new functionality to the system. Each agent is responsible for a specific task, such as data processing, rule evaluation, or output generation.
+## Agents
 
-# Tools
+An Agent is an autonomous entity that performs specific tasks using available tools. Agents define the user experience and are unique in their own way. Some agents can make the conversation fun while accomplishing tasks, similar to your favorite barista. Others might provide user experiences like a video player, display images, collections of images, or engage in text-based chat. Agents can also have personalities. We plan to add multiple agents for the same tasks but with a variety of user experiences.
+
+
+
+For example, the task "Give me a summary of this video" can be accomplished by choosing one of the summary agents:
+
+* "PromptSummarizer": This agent asks you for prompts that can be used for generating a summary. You have control and freedom over the style in each interaction.
+* "SceneSummarizer": This agent uses scene descriptions, audio, etc., to generate a summary in a specific format using its internal prompt.
+
+
+
+Key aspects of Agents include:
+
+* Task Autonomy: Agents perform tasks independently, utilizing tools to achieve their objectives.
+* Unique User Experiences (UX): Each agent offers a distinct user experience, enhancing engagement and satisfaction. Multiple agents for the same task offer personalized interactions and cater to different user preferences like loading a specific UI or just a text message.
+* Standardized Agent Interface: Agents communicate with the Reasoning Engine through a common API or protocol, ensuring consistent integration and interaction.
+
+## Tools
+
+Tools are functional building blocks that can be created from any library and used within agents. They are the functions that enable agents to perform their tasks. For example, we have created an upload tool that is a wrapper around the videodb upload function, another one is an index function with parameters.
+
+Key aspects of Tools include:
+
+* Functional Building Blocks: Serve as modular functions that agents can utilize to perform tasks efficiently.
+* Wrapper Functions: Act as wrappers for existing functions or libraries, enhancing modularity and reusability.
 
-Tools are the core building blocks of the Agents. They are used to extend the capabilities of the agents. Tools are designed to be modular and extensible, allowing developers to easily add new functionality to the system. Each tool is responsible for a specific task, such as data processing, rule evaluation, or output generation.
diff --git a/docs/core/reasoning.md b/docs/core/reasoning.md
@@ -1,10 +1,4 @@
-## Reasoning
-
-The "Reasoning" component of the Video Agent system comprises the ReasoningEngine and its configuration model, ReasoningEngineConfig. These core elements are designed to analyze and process input messages by utilizing a configurable set of language models. This facilitates advanced decision-making and response generation tailored to the context of video sessions. The configuration model allows precise control over operational parameters such as the number of iterations, system prompts, and integration with Langfuse for detailed operational tracing, enabling the system to adapt effectively to various interaction scenarios.
-
-### Reasoning Engine
+## Reasoning Engine
 
 
 ::: spielberg.core.reasoning.ReasoningEngine
-
-
diff --git a/docs/core/session.md b/docs/core/session.md
@@ -1,5 +1,10 @@
 ## Session
 
+
+### BaseMessage
+
+::: spielberg.core.session.BaseMessage
+
 ### InputMessage
 
 ::: spielberg.core.session.InputMessage

diff --git a/docs/get_started/install.md b/docs/get_started/install.md
@@ -1,39 +1,86 @@
 # Getting Started
 
-* Clone the repository:
+### Prerequisites
 
-```console
+- Python 3.9 or higher
+- Node.js 22.8.0 or higher
+- npm
+
+### Installation
+
+1. Clone the repository:
+
+``` bash
 git clone https://github.com/video-db/Spielberg.git
 cd Spielberg
 ```
 
-* Create the .env file and set the environment variables:
+2. Set up the environment:
 
-```console
-cp .env.example .env
+```bash
+./setup.sh
 ```
 
-* Use virtualenv as:
+This script will:
+- Install nvm (Node Version Manager) if not already installed
+- Install Node.js 22.8.0 using nvm
+- Install Python and pip
+- Set up virtual environments for both frontend and backend
+- Install dependencies for both frontend and backend
+
+Supported platforms:
+- Mac
+- Linux
+
+3. Configure the environment variables:
 
-```console
-python3 -m venv .venv
-source .venv/bin/activate
+```bash
+cp backend/.env.example backend/.env
+cp frontend/.env.example frontend/.env
 ```
 
-* Init the database
+Edit the `.env` files to add your API keys and other configuration options.
 
-```console
+[TODO]: Add all supported variables or point to documentation where we have given the list.
+
+4.  Initialize and configuring the Database
+
+For SQLite (default):
+```bash
 make init-sqlite-db
 ```
 
-* Install the dependencies:
+This command will initialize the SQLite DB file in the `backend` directory. No additional configuration is required for SQLite.
 
-```console
-make install
-```
+For other databases, follow the documentation [here](TODO: Add link to database configuration docs).
+
+
+## Project Structure
 
-* Start the server:
+- `backend/`: Contains the Flask backend application
+- `frontend/`: Contains the Vue 3 frontend application
+- `docs/`: Project documentation
+- `infra/`: Infrastructure-related files
 
-```console
+
+## Running the Application
+
+To start both the backend and frontend servers:
+
+```bash
 make run
 ```
+
+This will start the backend server on `http://127.0.0.1:8000` and the frontend server on `http://127.0.0.1:8080`.
+
+To run only the backend server:
+
+```bash
+make run-be
+```
+
+To just run the frontend development server:
+
+```bash
+make run-fe
+```
diff --git a/docs/index.md b/docs/index.md
@@ -1,5 +1,3 @@
 # Welcome to Spielberg
 
 The Spielberg project is an advanced video processing and analysis platform that utilizes a range of AI agents and language models to handle diverse video management needs and tasks. It features a modular architecture that supports easy expansion and integration of new functionalities. Core components include specialized agents for distinct processing tasks, multiple language models for natural language processing, and a flexible database interface for data storage and retrieval. The project emphasizes ease of installation and setup through a streamlined Makefile, catering to developers looking to deploy or extend its capabilities efficiently.
-
-## Features
diff --git a/docs/overrides/main.html b/docs/overrides/main.html
@@ -1,8 +1,18 @@
 {% extends "base.html" %}
 
 {% block announce %}
-<strong>Video Agents</strong> is in open beta. Come join our
+<strong>Spielberg</strong> is in open beta. Come join our
 <a href="https://discord.com/invite/py9P639jGz">
     Discord community
 </a>. Feedback and questions are welcome! 🚀
 {% endblock %}
+
+{% block htmltitle %}
+{% if page.meta and page.meta.title %}
+<title>{{ page.meta.title }}</title>
+{% elif page.title and not page.is_homepage %}
+<title>{{ page.title | striptags }}</title>
+{% else %}
+<title>{{ config.site_name }}</title>
+{% endif %}
+{% endblock %}
-Original file line number
+Diff line change
@@ Expand Up / @@ -15,7 +15,7 @@ Make install-be @@
     ### Start the documentation server
     ```bash
-    mkdocs serve
+    mkdocs serve -w ./backend
     ```
@@ Expand Down @@