Skip to content

Commit

Permalink
Retriever and lvm update for multimodal rag on videos (#606)
Browse files Browse the repository at this point in the history
* updates

Signed-off-by: Tiep Le <tiep.le@intel.com>

* cosmetic

Signed-off-by: siddhivelankar23 <siddhi.velankar@intel.com>

* update redis schema

Signed-off-by: siddhivelankar23 <siddhi.velankar@intel.com>

* update multimodal config and docker compose retriever

Signed-off-by: siddhivelankar23 <siddhi.velankar@intel.com>

* update requirements

Signed-off-by: siddhivelankar23 <siddhi.velankar@intel.com>

* update retriever redis

Signed-off-by: siddhivelankar23 <siddhi.velankar@intel.com>

* multimodal retriever implementation

Signed-off-by: siddhivelankar23 <siddhi.velankar@intel.com>

* test for multimodal retriever

Signed-off-by: siddhivelankar23 <siddhi.velankar@intel.com>

* include prompt preparation for multimodal rag on videos application

Signed-off-by: sjagtap1803 <siddhant.jagtap@intel.com>

* fix template

Signed-off-by: sjagtap1803 <siddhant.jagtap@intel.com>

* add test for llava for mm_rag_on_videos

Signed-off-by: sjagtap1803 <siddhant.jagtap@intel.com>

* update test

Signed-off-by: sjagtap1803 <siddhant.jagtap@intel.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix index not found

Signed-off-by: sjagtap1803 <siddhant.jagtap@intel.com>

* add LVMSearchedMultimodalDoc

Signed-off-by: sjagtap1803 <siddhant.jagtap@intel.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* remove INDEX_SCHEMA

Signed-off-by: siddhivelankar23 <siddhi.velankar@intel.com>

* revise folder structure to comps/retrievers/langchain/redis_multimodal

Signed-off-by: siddhivelankar23 <siddhi.velankar@intel.com>

* update test

Signed-off-by: siddhivelankar23 <siddhi.velankar@intel.com>

* change port of redis to resolve CI test

Signed-off-by: siddhivelankar23 <siddhi.velankar@intel.com>

* update test

Signed-off-by: siddhivelankar23 <siddhi.velankar@intel.com>

* update lvms test

Signed-off-by: siddhivelankar23 <siddhi.velankar@intel.com>

---------

Signed-off-by: Tiep Le <tiep.le@intel.com>
Signed-off-by: siddhivelankar23 <siddhi.velankar@intel.com>
Signed-off-by: sjagtap1803 <siddhant.jagtap@intel.com>
Co-authored-by: siddhivelankar23 <siddhi.velankar@intel.com>
Co-authored-by: sjagtap1803 <siddhant.jagtap@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
4 people authored Sep 6, 2024
1 parent 90cc44f commit 1513998
Show file tree
Hide file tree
Showing 15 changed files with 525 additions and 8 deletions.
1 change: 1 addition & 0 deletions comps/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
LLMParamsDoc,
SearchedDoc,
SearchedMultimodalDoc,
LVMSearchedMultimodalDoc,
RerankedDoc,
TextDoc,
RAGASParams,
Expand Down
18 changes: 18 additions & 0 deletions comps/cores/proto/docarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,24 @@ class SearchedMultimodalDoc(SearchedDoc):
metadata: List[Dict[str, Any]]


class LVMSearchedMultimodalDoc(SearchedMultimodalDoc):
max_new_tokens: conint(ge=0, le=1024) = 512
top_k: int = 10
top_p: float = 0.95
typical_p: float = 0.95
temperature: float = 0.01
streaming: bool = False
repetition_penalty: float = 1.03
chat_template: Optional[str] = Field(
default=None,
description=(
"A template to use for this conversion. "
"If this is not passed, the model's default chat template will be "
"used instead. We recommend that the template contains {context} and {question} for multimodal-rag on videos."
),
)


class GeneratedDoc(BaseDoc):
text: str
prompt: str
Expand Down
39 changes: 32 additions & 7 deletions comps/lvms/lvm.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,16 @@
import json
import os
import time
from typing import Union

import requests
from langchain_core.prompts import PromptTemplate
from template import ChatTemplate

from comps import (
CustomLogger,
LVMDoc,
LVMSearchedMultimodalDoc,
ServiceType,
TextDoc,
opea_microservices,
Expand All @@ -29,20 +33,41 @@
endpoint="/v1/lvm",
host="0.0.0.0",
port=9399,
input_datatype=LVMDoc,
output_datatype=TextDoc,
)
@register_statistics(names=["opea_service@lvm"])
async def lvm(request: LVMDoc):
async def lvm(request: Union[LVMDoc, LVMSearchedMultimodalDoc]) -> TextDoc:
if logflag:
logger.info(request)
start = time.time()
img_b64_str = request.image
prompt = request.prompt
max_new_tokens = request.max_new_tokens
if isinstance(request, LVMSearchedMultimodalDoc):
if logflag:
logger.info("[LVMSearchedMultimodalDoc ] input from retriever microservice")
retrieved_metadatas = request.metadata
img_b64_str = retrieved_metadatas[0]["b64_img_str"]
initial_query = request.initial_query
context = retrieved_metadatas[0]["transcript_for_inference"]
prompt = initial_query
if request.chat_template is None:
prompt = ChatTemplate.generate_multimodal_rag_on_videos_prompt(initial_query, context)
else:
prompt_template = PromptTemplate.from_template(request.chat_template)
input_variables = prompt_template.input_variables
if sorted(input_variables) == ["context", "question"]:
prompt = prompt_template.format(question=initial_query, context=context)
else:
logger.info(
f"[ LVMSearchedMultimodalDoc ] {prompt_template} not used, we only support 2 input variables ['question', 'context']"
)
max_new_tokens = request.max_new_tokens
if logflag:
logger.info(f"prompt generated for [LVMSearchedMultimodalDoc ] input from retriever microservice: {prompt}")

inputs = {"img_b64_str": img_b64_str, "prompt": prompt, "max_new_tokens": max_new_tokens}
else:
img_b64_str = request.image
prompt = request.prompt
max_new_tokens = request.max_new_tokens

inputs = {"img_b64_str": img_b64_str, "prompt": prompt, "max_new_tokens": max_new_tokens}
# forward to the LLaVA server
response = requests.post(url=f"{lvm_endpoint}/generate", data=json.dumps(inputs), proxies={"http": None})

Expand Down
1 change: 1 addition & 0 deletions comps/lvms/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ datasets
docarray[full]
fastapi
huggingface_hub
langchain-core
opentelemetry-api
opentelemetry-exporter-otlp
opentelemetry-sdk
Expand Down
10 changes: 10 additions & 0 deletions comps/lvms/template.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0


class ChatTemplate:

@staticmethod
def generate_multimodal_rag_on_videos_prompt(question: str, context: str):
template = """The transcript associated with the image is '{context}'. {question}"""
return template.format(context=context, question=question)
123 changes: 123 additions & 0 deletions comps/retrievers/langchain/redis_multimodal/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
# Retriever Microservice

This retriever microservice is a highly efficient search service designed for handling and retrieving embedding vectors from multimodal data. It operates by receiving an embedding vector as input and conducting a similarity search against vectors stored in a VectorDB database. Users must specify the VectorDB's URL and the index name, and the service searches within that index to find documents with the highest similarity to the input vector.

The service primarily utilizes similarity measures in vector space to rapidly retrieve contentually similar documents. The vector-based retrieval approach is particularly suited for handling large datasets, offering fast and accurate search results that significantly enhance the efficiency and quality of information retrieval.

Overall, this microservice provides robust backend support for applications requiring efficient similarity searches, playing a vital role in scenarios such as recommendation systems, information retrieval, or any other context where precise measurement of document similarity is crucial.

## 🚀1. Start Microservice with Python (Option 1)

To start the retriever microservice, you must first install the required python packages.

### 1.1 Install Requirements

```bash
pip install -r requirements.txt
```

### 1.2 Setup VectorDB Service

You need to setup your own VectorDB service (Redis in this example), and ingest your knowledge documents into the vector database.

As for Redis, you could start a docker container using the following commands.
Remember to ingest data into it manually.

```bash
docker run -d --name="redis-vector-db" -p 6379:6379 -p 8001:8001 redis/redis-stack:7.2.0-v9
```

### 1.3 Ingest images or video

Upload a video or images using the dataprep microservice, instructions can be found [here](https://github.com/opea-project/GenAIComps/tree/main/comps/dataprep/redis/multimodal_langchain/README.md).

### 1.4 Start Retriever Service

```bash
python retriever_redis.py
```

## 🚀2. Start Microservice with Docker (Option 2)

### 2.1 Setup Environment Variables

```bash
export your_ip=$(hostname -I | awk '{print $1}')
export REDIS_URL="redis://${your_ip}:6379"
export INDEX_NAME=${your_index_name}
```

### 2.2 Build Docker Image

```bash
cd ../../../../
docker build -t opea/multimodal-retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/langchain/redis_multimodal/docker/Dockerfile .
```

To start a docker container, you have two options:

- A. Run Docker with CLI
- B. Run Docker with Docker Compose

You can choose one as needed.

### 2.3 Run Docker with CLI (Option A)

```bash
docker run -d --name="multimodal-retriever-redis-server" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME opea/multimodal-retriever-redis:latest
```

### 2.4 Run Docker with Docker Compose (Option B)

```bash
cd docker
docker compose -f docker_compose_retriever.yaml up -d
```

## 🚀3. Consume Retriever Service

### 3.1 Consume Embedding Service

To consume the Retriever Microservice, you can generate a mock embedding vector of length 512 with Python.

```bash
your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(512)]; print(embedding)")
curl http://${your_ip}:7000/v1/multimodal_retrieval \
-X POST \
-d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding}}" \
-H 'Content-Type: application/json'
```

You can set the parameters for the retriever.

```bash
your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(512)]; print(embedding)")
curl http://localhost:7000/v1/multimodal_retrieval \
-X POST \
-d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding},\"search_type\":\"similarity\", \"k\":4}" \
-H 'Content-Type: application/json'
```

```bash
your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(512)]; print(embedding)")
curl http://localhost:7000/v1/multimodal_retrieval \
-X POST \
-d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding},\"search_type\":\"similarity_distance_threshold\", \"k\":4, \"distance_threshold\":1.0}" \
-H 'Content-Type: application/json'
```

```bash
your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(512)]; print(embedding)")
curl http://localhost:7000/v1/multimodal_retrieval \
-X POST \
-d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding},\"search_type\":\"similarity_score_threshold\", \"k\":4, \"score_threshold\":0.2}" \
-H 'Content-Type: application/json'
```

```bash
your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(512)]; print(embedding)")
curl http://localhost:7000/v1/multimodal_retrieval \
-X POST \
-d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding},\"search_type\":\"mmr\", \"k\":4, \"fetch_k\":20, \"lambda_mult\":0.5}" \
-H 'Content-Type: application/json'
```
2 changes: 2 additions & 0 deletions comps/retrievers/langchain/redis_multimodal/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
29 changes: 29 additions & 0 deletions comps/retrievers/langchain/redis_multimodal/docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

FROM langchain/langchain:latest

ARG ARCH="cpu"

RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
libgl1-mesa-glx \
libjemalloc-dev \
vim

RUN useradd -m -s /bin/bash user && \
mkdir -p /home/user && \
chown -R user /home/user/

COPY comps /home/user/comps

USER user

RUN pip install --no-cache-dir --upgrade pip && \
if [ ${ARCH} = "cpu" ]; then pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \
pip install --no-cache-dir -r /home/user/comps/retrievers/langchain/redis_multimodal/requirements.txt

ENV PYTHONPATH=$PYTHONPATH:/home/user

WORKDIR /home/user/comps/retrievers/langchain/redis_multimodal

ENTRYPOINT ["python", "retriever_redis.py"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

version: "1.0"

services:
retriever:
image: opea/multimodal-retriever-redis:latest
container_name: multimodal-retriever-redis-server
ports:
- "7000:7000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: ${REDIS_URL}
INDEX_NAME: ${INDEX_NAME}
restart: unless-stopped

networks:
default:
driver: bridge
77 changes: 77 additions & 0 deletions comps/retrievers/langchain/redis_multimodal/multimodal_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

import os

current_file_path = os.path.abspath(__file__)
parent_dir = os.path.dirname(current_file_path)


def get_boolean_env_var(var_name, default_value=False):
"""Retrieve the boolean value of an environment variable.
Args:
var_name (str): The name of the environment variable to retrieve.
default_value (bool): The default value to return if the variable
is not found.
Returns:
bool: The value of the environment variable, interpreted as a boolean.
"""
true_values = {"true", "1", "t", "y", "yes"}
false_values = {"false", "0", "f", "n", "no"}

# Retrieve the environment variable's value
value = os.getenv(var_name, "").lower()

# Decide the boolean value based on the content of the string
if value in true_values:
return True
elif value in false_values:
return False
else:
return default_value


# Check for openai API key
# if "OPENAI_API_KEY" not in os.environ:
# raise Exception("Must provide an OPENAI_API_KEY as an env var.")


# Whether or not to enable langchain debugging
DEBUG = get_boolean_env_var("DEBUG", False)
# Set DEBUG env var to "true" if you wish to enable LC debugging module
if DEBUG:
import langchain

langchain.debug = True


# Embedding model
EMBED_MODEL = os.getenv("EMBED_MODEL", "BridgeTower/bridgetower-large-itm-mlm-itc")

# Redis Connection Information
REDIS_HOST = os.getenv("REDIS_HOST", "localhost")
REDIS_PORT = int(os.getenv("REDIS_PORT", 6379))


def format_redis_conn_from_env():
redis_url = os.getenv("REDIS_URL", None)
if redis_url:
return redis_url
else:
using_ssl = get_boolean_env_var("REDIS_SSL", False)
start = "rediss://" if using_ssl else "redis://"

# if using RBAC
password = os.getenv("REDIS_PASSWORD", None)
username = os.getenv("REDIS_USERNAME", "default")
if password is not None:
start += f"{username}:{password}@"

return start + f"{REDIS_HOST}:{REDIS_PORT}"


REDIS_URL = format_redis_conn_from_env()

# Vector Index Configuration
INDEX_NAME = os.getenv("INDEX_NAME", "test-index")
11 changes: 11 additions & 0 deletions comps/retrievers/langchain/redis_multimodal/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
docarray[full]
fastapi
langchain_community
opentelemetry-api
opentelemetry-exporter-otlp
opentelemetry-sdk
prometheus-fastapi-instrumentator
redis
shortuuid
transformers
uvicorn
Loading

0 comments on commit 1513998

Please sign in to comment.