Skip to content

Commit

Permalink
Merge branch 'main' into ankit/refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
ankit-v2-3 authored Oct 23, 2024
2 parents cbf4892 + d977dc1 commit 9150c03
Show file tree
Hide file tree
Showing 18 changed files with 245 additions and 58 deletions.
17 changes: 14 additions & 3 deletions backend/spielberg/agents/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@
"type": "string",
"description": "URL to upload the content",
},
"name": {
"type": "string",
"description": "Name of the content to upload",
},
"media_type": {
"type": "string",
"enum": ["video", "audio", "image"],
Expand All @@ -42,7 +46,7 @@ def __init__(self, session: Session, **kwargs):
self.parameters = UPLOAD_AGENT_PARAMETERS
super().__init__(session=session, **kwargs)

def _upload(self, url: str, media_type: str):
def _upload(self, url: str, media_type: str, name: str):
"""Upload the media with the given URL."""
try:
if media_type == "video":
Expand All @@ -57,7 +61,7 @@ def _upload(self, url: str, media_type: str):
content.status_message = f"Uploading {media_type}..."
self.output_message.push_update()

upload_data = self.videodb_tool.upload(url, media_type)
upload_data = self.videodb_tool.upload(url, media_type, name=name)

content.status_message = f"{upload_data['name']} uploaded successfully"
if media_type == "video":
Expand Down Expand Up @@ -119,7 +123,13 @@ def _upload_yt_playlist(self, playlist_info: dict, media_type):
)

def run(
self, url: str, media_type="video", collection_id: str = None, *args, **kwargs
self,
url: str,
media_type="video",
collection_id: str = None,
name: str = None,
*args,
**kwargs,
) -> AgentResponse:
"""
Upload the media with the given URL.
Expand All @@ -146,3 +156,4 @@ def run(

# upload the media
return self._upload(url, media_type)

19 changes: 12 additions & 7 deletions backend/spielberg/core/reasoning.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,13 @@


class ReasoningEngine:
"""The ReasoningEngine class."""
"""The Reasoning Engine is the core class that directly interfaces with the user. It interprets natural language input in any conversation and orchestrates agents to fulfill the user's requests. The primary functions of the Reasoning Engine are:
* Maintain Context of Conversational History: Manage memory, context limits, input, and output experiences to ensure coherent and context-aware interactions.
* Natural Language Understanding (NLU): Uses LLMs of your choice to have understanding of the task.
* Intelligent Reference Deduction: Intelligently deduce references to previous messages, outputs, files, agents, etc., to provide relevant and accurate responses.
* Agent Orchestration: Decide on agents and their workflows to fulfill requests. Multiple strategies can be employed to create agent workflows, such as step-by-step processes or chaining of agents provided by default.
* Final Control Over Conversation Flow: Maintain ultimate control over the flow of conversation with the user, ensuring coherence and goal alignment."""

def __init__(
self,
Expand All @@ -46,8 +52,8 @@ def __init__(
):
"""Initialize the ReasoningEngine.
:param InputMessage input_message: The input message to the reasoning engine
:param Session session: The session instance
:param input_message: The input message to the reasoning engine.
:param session: The session instance.
"""
self.input_message = input_message
self.session = session
Expand All @@ -61,7 +67,7 @@ def __init__(
def register_agents(self, agents: List[BaseAgent]):
"""Register an agents.
:param List[BaseAgent] agents: The list of agents to register
:param agents: The list of agents to register.
"""
self.agents.extend(agents)

Expand Down Expand Up @@ -110,8 +116,7 @@ def run_agent(self, agent_name: str, *args, **kwargs) -> AgentResponse:
:param str agent_name: The name of the agent to run
:param args: The arguments to pass to the agent
:param kwargs: The keyword arguments to pass to the agent
:return: :class:`AgentResponse` instance
:rtype: AgentResponse
:return: The response from the agent
"""
print("-" * 40, f"Running {agent_name} Agent", "-" * 40)
print(kwargs, "\n\n")
Expand Down Expand Up @@ -208,7 +213,7 @@ def step(self):
def run(self, max_iterations: int = None):
"""Run the reasoning engine.
:param int max_iterations: (optional) The number of max_iterations to run the reasoning engine
:param int max_iterations: The number of max_iterations to run the reasoning engine
"""
self.iterations = max_iterations or self.max_iterations
self.build_context()
Expand Down
8 changes: 6 additions & 2 deletions backend/spielberg/core/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ class BaseMessage(BaseModel):
:param str session_id: Session is of the messages
:param str conv_id: Conversation id
:param int msg_id: (optional) Message id
:param MsgType msg_type: (optional) :class:`MsgType` of the message
:param msg_type: Type of the message
"""

model_config = ConfigDict(
Expand All @@ -126,7 +126,11 @@ class BaseMessage(BaseModel):


class InputMessage(BaseMessage):
"""Input message to the agent"""
"""Input message to the agent
:param BaseDB db: Database instance
:param MsgType msg_type: :class:`MsgType` of the message
"""

db: BaseDB
msg_type: MsgType = MsgType.input
Expand Down
10 changes: 7 additions & 3 deletions backend/spielberg/tools/videodb_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,16 +64,20 @@ def get_videos(self):
for video in videos
]

def upload(self, url, media_type):
media = self.conn.upload(url=url, media_type=media_type)
def upload(self, url, media_type, name=None):
if name is None:
media = self.conn.upload(url=url, media_type=media_type)
name = media.name
else:
media = self.conn.upload(url=url, media_type=media_type, name=name)

if media_type == "video":
return {
"id": media.id,
"collection_id": media.collection_id,
"stream_url": media.stream_url,
"player_url": media.player_url,
"name": media.name,
"name": name,
"description": media.description,
"thumbnail_url": media.thumbnail_url,
"length": media.length,
Expand Down
2 changes: 1 addition & 1 deletion docs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ Make install-be

### Start the documentation server
```bash
mkdocs serve
mkdocs serve -w ./backend
```


Expand Down
Binary file modified docs/assets/favicon.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/assets/logo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
39 changes: 33 additions & 6 deletions docs/concepts/overview.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,39 @@
# Reasoning Engine
## Reasoning Engine

The Reasoning Engine is the core of the system. It is responsible for processing the input data and generating the output data. The Reasoning Engine is a collection of modules that work together to perform the reasoning process. Each module is responsible for a specific task, such as data processing, rule evaluation, or output generation.
The Reasoning Engine is the core component that directly interfaces with the user. It interprets natural language input in any conversation and orchestrates agents to fulfill the user's requests. The primary functions of the Reasoning Engine are:

* Maintain Context of Conversational History: Manage memory, context limits, input, and output experiences to ensure coherent and context-aware interactions.
* Natural Language Understanding (NLU): Uses LLMs of your choice to have understanding of the task.
* Intelligent Reference Deduction: Intelligently deduce references to previous messages, outputs, files, agents, etc., to provide relevant and accurate responses.
* Agent Orchestration: Decide on agents and their workflows to fulfill requests. Multiple strategies can be employed to create agent workflows, such as step-by-step processes or chaining of agents provided by default.
* Final Control Over Conversation Flow: Maintain ultimate control over the flow of conversation with the user, ensuring coherence and goal alignment.

# Agents

Agents are the core building blocks of the Reasoning Engine. They are responsible for processing the input data and generating the output data. Agents are designed to be modular and extensible, allowing developers to easily add new functionality to the system. Each agent is responsible for a specific task, such as data processing, rule evaluation, or output generation.
## Agents

# Tools
An Agent is an autonomous entity that performs specific tasks using available tools. Agents define the user experience and are unique in their own way. Some agents can make the conversation fun while accomplishing tasks, similar to your favorite barista. Others might provide user experiences like a video player, display images, collections of images, or engage in text-based chat. Agents can also have personalities. We plan to add multiple agents for the same tasks but with a variety of user experiences.



For example, the task "Give me a summary of this video" can be accomplished by choosing one of the summary agents:

* "PromptSummarizer": This agent asks you for prompts that can be used for generating a summary. You have control and freedom over the style in each interaction.
* "SceneSummarizer": This agent uses scene descriptions, audio, etc., to generate a summary in a specific format using its internal prompt.



Key aspects of Agents include:

* Task Autonomy: Agents perform tasks independently, utilizing tools to achieve their objectives.
* Unique User Experiences (UX): Each agent offers a distinct user experience, enhancing engagement and satisfaction. Multiple agents for the same task offer personalized interactions and cater to different user preferences like loading a specific UI or just a text message.
* Standardized Agent Interface: Agents communicate with the Reasoning Engine through a common API or protocol, ensuring consistent integration and interaction.

## Tools

Tools are functional building blocks that can be created from any library and used within agents. They are the functions that enable agents to perform their tasks. For example, we have created an upload tool that is a wrapper around the videodb upload function, another one is an index function with parameters.

Key aspects of Tools include:

* Functional Building Blocks: Serve as modular functions that agents can utilize to perform tasks efficiently.
* Wrapper Functions: Act as wrappers for existing functions or libraries, enhancing modularity and reusability.

Tools are the core building blocks of the Agents. They are used to extend the capabilities of the agents. Tools are designed to be modular and extensible, allowing developers to easily add new functionality to the system. Each tool is responsible for a specific task, such as data processing, rule evaluation, or output generation.
8 changes: 1 addition & 7 deletions docs/core/reasoning.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,4 @@
## Reasoning

The "Reasoning" component of the Video Agent system comprises the ReasoningEngine and its configuration model, ReasoningEngineConfig. These core elements are designed to analyze and process input messages by utilizing a configurable set of language models. This facilitates advanced decision-making and response generation tailored to the context of video sessions. The configuration model allows precise control over operational parameters such as the number of iterations, system prompts, and integration with Langfuse for detailed operational tracing, enabling the system to adapt effectively to various interaction scenarios.

### Reasoning Engine
## Reasoning Engine


::: spielberg.core.reasoning.ReasoningEngine


5 changes: 5 additions & 0 deletions docs/core/session.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
## Session


### BaseMessage

::: spielberg.core.session.BaseMessage

### InputMessage

::: spielberg.core.session.InputMessage
Expand Down
81 changes: 64 additions & 17 deletions docs/get_started/install.md
Original file line number Diff line number Diff line change
@@ -1,39 +1,86 @@
# Getting Started

* Clone the repository:
### Prerequisites

```console
- Python 3.9 or higher
- Node.js 22.8.0 or higher
- npm

### Installation

1. Clone the repository:

``` bash
git clone https://github.com/video-db/Spielberg.git
cd Spielberg
```

* Create the .env file and set the environment variables:
2. Set up the environment:

```console
cp .env.example .env
```bash
./setup.sh
```

* Use virtualenv as:
This script will:
- Install nvm (Node Version Manager) if not already installed
- Install Node.js 22.8.0 using nvm
- Install Python and pip
- Set up virtual environments for both frontend and backend
- Install dependencies for both frontend and backend

Supported platforms:
- Mac
- Linux

3. Configure the environment variables:

```console
python3 -m venv .venv
source .venv/bin/activate
```bash
cp backend/.env.example backend/.env
cp frontend/.env.example frontend/.env
```

* Init the database
Edit the `.env` files to add your API keys and other configuration options.

```console
[TODO]: Add all supported variables or point to documentation where we have given the list.

4. Initialize and configuring the Database

For SQLite (default):
```bash
make init-sqlite-db
```

* Install the dependencies:
This command will initialize the SQLite DB file in the `backend` directory. No additional configuration is required for SQLite.

```console
make install
```
For other databases, follow the documentation [here](TODO: Add link to database configuration docs).


## Project Structure

* Start the server:
- `backend/`: Contains the Flask backend application
- `frontend/`: Contains the Vue 3 frontend application
- `docs/`: Project documentation
- `infra/`: Infrastructure-related files

```console

## Running the Application

To start both the backend and frontend servers:

```bash
make run
```

This will start the backend server on `http://127.0.0.1:8000` and the frontend server on `http://127.0.0.1:8080`.

To run only the backend server:

```bash
make run-be
```

To just run the frontend development server:

```bash
make run-fe
```
2 changes: 0 additions & 2 deletions docs/index.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
# Welcome to Spielberg

The Spielberg project is an advanced video processing and analysis platform that utilizes a range of AI agents and language models to handle diverse video management needs and tasks. It features a modular architecture that supports easy expansion and integration of new functionalities. Core components include specialized agents for distinct processing tasks, multiple language models for natural language processing, and a flexible database interface for data storage and retrieval. The project emphasizes ease of installation and setup through a streamlined Makefile, catering to developers looking to deploy or extend its capabilities efficiently.

## Features
12 changes: 11 additions & 1 deletion docs/overrides/main.html
Original file line number Diff line number Diff line change
@@ -1,8 +1,18 @@
{% extends "base.html" %}

{% block announce %}
<strong>Video Agents</strong> is in open beta. Come join our
<strong>Spielberg</strong> is in open beta. Come join our
<a href="https://discord.com/invite/py9P639jGz">
Discord community
</a>. Feedback and questions are welcome! 🚀
{% endblock %}

{% block htmltitle %}
{% if page.meta and page.meta.title %}
<title>{{ page.meta.title }}</title>
{% elif page.title and not page.is_homepage %}
<title>{{ page.title | striptags }}</title>
{% else %}
<title>{{ config.site_name }}</title>
{% endif %}
{% endblock %}
Loading

0 comments on commit 9150c03

Please sign in to comment.