Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support llamaindex for retrieval microservice and remove langchain dependency for llm and rerank microservice #152

Merged
merged 30 commits into from
Jul 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
5c65750
remove langchain dependency for llm and rerank
lvliang-intel Jun 11, 2024
e7ba102
add llamaindex support for retrieval
lvliang-intel Jun 11, 2024
1eb4e00
fix schema issue
lvliang-intel Jun 12, 2024
d97b422
Merge branch 'main' of https://github.com/lvliang-intel/GenAIComps in…
lvliang-intel Jun 12, 2024
67dae44
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 12, 2024
2bf38e3
fix dockerfile
lvliang-intel Jun 12, 2024
5e60894
Merge branch 'support_llamaindex' of https://github.com/lvliang-intel…
lvliang-intel Jun 12, 2024
e0ca870
update readme
lvliang-intel Jun 12, 2024
9f7e9f3
update reamde
Jun 12, 2024
8d0eceb
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 12, 2024
aa67db0
fix entrypoint
lvliang-intel Jun 12, 2024
49a5ad4
Merge branch 'support_llamaindex' of https://github.com/lvliang-intel…
lvliang-intel Jun 12, 2024
8fdeee2
Merge branch 'main' of https://github.com/lvliang-intel/GenAIComps in…
lvliang-intel Jun 13, 2024
ad10610
Merge branch 'main' of https://github.com/lvliang-intel/GenAIComps in…
lvliang-intel Jun 13, 2024
27099b5
Merge branch 'support_llamaindex' of https://github.com/lvliang-intel…
lvliang-intel Jun 13, 2024
fc9ea1b
add dataprep process in test script
lvliang-intel Jun 13, 2024
bf60d7c
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 13, 2024
6963efc
fix redis url for dataprep
lvliang-intel Jun 13, 2024
6fb7743
Merge branch 'support_llamaindex' of https://github.com/lvliang-intel…
lvliang-intel Jun 13, 2024
5f9e6ea
Merge branch 'main' of https://github.com/lvliang-intel/GenAIComps in…
lvliang-intel Jun 27, 2024
1fb7f27
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 27, 2024
4810ca9
update readme
lvliang-intel Jun 27, 2024
5bdf972
Merge branch 'support_llamaindex' of https://github.com/lvliang-intel…
lvliang-intel Jun 27, 2024
caff2e4
update code
lvliang-intel Jun 27, 2024
36b8f68
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 27, 2024
6889fba
Merge branch 'support_llamaindex' of https://github.com/lvliang-intel…
lvliang-intel Jun 27, 2024
85d5b29
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 27, 2024
0b93b86
Merge branch 'main' of https://github.com/lvliang-intel/GenAIComps in…
lvliang-intel Jul 5, 2024
2f0956e
Merge branch 'support_llamaindex' of https://github.com/lvliang-intel…
lvliang-intel Jul 5, 2024
1bff6cf
Merge branch 'main' into support_llamaindex
lvliang-intel Jul 9, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 27 additions & 17 deletions comps/llms/text-generation/tgi/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import time

from fastapi.responses import StreamingResponse
from langchain_community.llms import HuggingFaceEndpoint
from huggingface_hub import AsyncInferenceClient
from langsmith import traceable

from comps import (
Expand All @@ -28,26 +28,23 @@
)
@traceable(run_type="llm")
@register_statistics(names=["opea_service@llm_tgi"])
def llm_generate(input: LLMParamsDoc):
async def llm_generate(input: LLMParamsDoc):
stream_gen_time = []
start = time.time()
llm_endpoint = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080")
llm = HuggingFaceEndpoint(
endpoint_url=llm_endpoint,
max_new_tokens=input.max_new_tokens,
top_k=input.top_k,
top_p=input.top_p,
typical_p=input.typical_p,
temperature=input.temperature,
repetition_penalty=input.repetition_penalty,
streaming=input.streaming,
timeout=600,
)
if input.streaming:
stream_gen_time = []

async def stream_generator():
chat_response = ""
async for text in llm.astream(input.query):
text_generation = await llm.text_generation(
prompt=input.query,
stream=input.streaming,
max_new_tokens=input.max_new_tokens,
repetition_penalty=input.repetition_penalty,
temperature=input.temperature,
top_k=input.top_k,
top_p=input.top_p,
)
async for text in text_generation:
stream_gen_time.append(time.time() - start)
chat_response += text
chunk_repr = repr(text.encode("utf-8"))
Expand All @@ -59,10 +56,23 @@ async def stream_generator():

return StreamingResponse(stream_generator(), media_type="text/event-stream")
else:
response = llm.invoke(input.query)
response = await llm.text_generation(
prompt=input.query,
stream=input.streaming,
max_new_tokens=input.max_new_tokens,
repetition_penalty=input.repetition_penalty,
temperature=input.temperature,
top_k=input.top_k,
top_p=input.top_p,
)
statistics_dict["opea_service@llm_tgi"].append_latency(time.time() - start, None)
return GeneratedDoc(text=response, prompt=input.query)


if __name__ == "__main__":
llm_endpoint = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080")
llm = AsyncInferenceClient(
model=llm_endpoint,
timeout=600,
)
opea_microservices["opea_service@llm_tgi"].start()
1 change: 0 additions & 1 deletion comps/llms/text-generation/tgi/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
docarray[full]
fastapi
huggingface_hub
langchain==0.1.16
langsmith
opentelemetry-api
opentelemetry-exporter-otlp
Expand Down
1 change: 0 additions & 1 deletion comps/reranks/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
docarray[full]
fastapi
langchain
langsmith
opentelemetry-api
opentelemetry-exporter-otlp
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ RUN pip install --no-cache-dir --upgrade pip && \

ENV PYTHONPATH=$PYTHONPATH:/home/user

WORKDIR /home/user/comps/reranks/langchain
WORKDIR /home/user/comps/reranks/tei

ENTRYPOINT ["python", "reranking_tei_xeon.py"]
ENTRYPOINT ["python", "reranking_tei.py"]

Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import time

import requests
from langchain_core.prompts import ChatPromptTemplate
from langsmith import traceable

from comps import (
Expand Down Expand Up @@ -48,14 +47,23 @@ def reranking(input: SearchedDoc) -> LLMParamsDoc:
context_str = context_str + " " + input.retrieved_docs[best_response["index"]].text
if context_str and len(re.findall("[\u4E00-\u9FFF]", context_str)) / len(context_str) >= 0.3:
# chinese context
template = "仅基于以下背景回答问题:\n{context}\n问题: {question}"
template = """
### 你将扮演一个乐于助人、尊重他人并诚实的助手,你的目标是帮助用户解答问题。有效地利用来自本地知识库的搜索结果。确保你的回答中只包含相关信息。如果你不确定问题的答案,请避免分享不准确的信息。
### 搜索结果:{context}
### 问题:{question}
### 回答:
"""
else:
template = """Answer the question based only on the following context:
{context}
Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)
final_prompt = prompt.format(context=context_str, question=input.initial_query)
template = """
### You are a helpful, respectful and honest assistant to help the user with questions. \
Please refer to the search results obtained from the local knowledge base. \
But be careful to not incorporate the information that you think is not relevant to the question. \
If you don't know the answer to a question, please don't share false information. \
### Search results: {context} \n
### Question: {question} \n
### Answer:
"""
final_prompt = template.format(context=context_str, question=input.initial_query)
statistics_dict["opea_service@reranking_tgi_gaudi"].append_latency(time.time() - start, None)
return LLMParamsDoc(query=final_prompt.strip())
else:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@ Overall, this microservice provides robust backend support for applications requ

# Retriever Microservice with Redis

For details, please refer to this [readme](langchain/redis/README.md)
For details, please refer to this [readme](redis/README.md)

# Retriever Microservice with Milvus

For details, please refer to this [readme](langchain/milvus/README.md)
For details, please refer to this [readme](milvus/README.md)

# Retriever Microservice with PGVector

For details, please refer to this [readme](langchain/pgvector/README.md)
For details, please refer to this [readme](pgvector/README.md)
2 changes: 1 addition & 1 deletion comps/retrievers/langchain/pinecone/docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ RUN chmod +x /home/user/comps/retrievers/langchain/pinecone/run.sh
USER user

RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir -r /home/user/comps/retrievers/requirements.txt
pip install --no-cache-dir -r /home/user/comps/retrievers/langchain/pinecone/requirements.txt

ENV PYTHONPATH=$PYTHONPATH:/home/user

Expand Down
97 changes: 97 additions & 0 deletions comps/retrievers/llamaindex/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# Retriever Microservice

This retriever microservice is a highly efficient search service designed for handling and retrieving embedding vectors. It operates by receiving an embedding vector as input and conducting a similarity search against vectors stored in a VectorDB database. Users must specify the VectorDB's URL and the index name, and the service searches within that index to find documents with the highest similarity to the input vector.

The service primarily utilizes similarity measures in vector space to rapidly retrieve contentually similar documents. The vector-based retrieval approach is particularly suited for handling large datasets, offering fast and accurate search results that significantly enhance the efficiency and quality of information retrieval.

Overall, this microservice provides robust backend support for applications requiring efficient similarity searches, playing a vital role in scenarios such as recommendation systems, information retrieval, or any other context where precise measurement of document similarity is crucial.

# 🚀1. Start Microservice with Python (Option 1)

To start the retriever microservice, you must first install the required python packages.

## 1.1 Install Requirements

```bash
pip install -r requirements.txt
```

## 1.2 Setup VectorDB Service

You need to setup your own VectorDB service (Redis in this example), and ingest your knowledge documents into the vector database.

As for Redis, you could start a docker container using the following commands.
Remember to ingest data into it manually.

```bash
docker run -d --name="redis-vector-db" -p 6379:6379 -p 8001:8001 redis/redis-stack:7.2.0-v9
```

And then ingest data into the Redis VectorDB using the methods described in the dataprep microservice.

## 1.3 Start Retriever Service

```bash
python retriever_redis.py
```

# 🚀2. Start Microservice with Docker (Option 2)

## 2.1 Setup Environment Variables

```bash
export REDIS_URL="redis://${your_ip}:6379"
export INDEX_NAME=${your_index_name}
export LANGCHAIN_TRACING_V2=true
export LANGCHAIN_API_KEY=${your_langchain_api_key}
export LANGCHAIN_PROJECT="opea/retrievers"
```

## 2.2 Build Docker Image

```bash
cd ../../
docker build -t opea/retriever-redis-llamaindex:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/llamaindex/docker/Dockerfile .
```

To start a docker container, you have two options:

- A. Run Docker with CLI
- B. Run Docker with Docker Compose

You can choose one as needed.

## 2.3 Run Docker with CLI (Option A)

```bash
docker run -d --name="retriever-redis-server" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME opea/retriever-redis:latest
```

## 2.4 Run Docker with Docker Compose (Option B)

```bash
cd llamaindex/docker
docker compose -f docker_compose_retriever.yaml up -d
```

# 🚀3. Consume Retriever Service

## 3.1 Check Service Status

```bash
curl http://localhost:7000/v1/health_check \
-X GET \
-H 'Content-Type: application/json'
```

## 3.2 Consume Retriever Service

To consume the Retriever Microservice, you can generate a mock embedding vector of length 768 with Python.

```bash
your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
curl http://${your_ip}:7000/v1/retrieval \
-X POST \
-d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding}}" \
-H 'Content-Type: application/json'
```
2 changes: 2 additions & 0 deletions comps/retrievers/llamaindex/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
27 changes: 27 additions & 0 deletions comps/retrievers/llamaindex/docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@

# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

FROM ubuntu:22.04

RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
libgl1-mesa-glx \
libjemalloc-dev \
vim

RUN useradd -m -s /bin/bash user && \
mkdir -p /home/user && \
chown -R user /home/user/

COPY comps /home/user/comps

USER user

RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir -r /home/user/comps/retrievers/llamaindex/requirements.txt

ENV PYTHONPATH=$PYTHONPATH:/home/user

WORKDIR /home/user/comps/retrievers/llamaindex

ENTRYPOINT ["python", "retriever_redis.py"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

version: "3.8"

services:
redis-vector-db:
image: redis/redis-stack:7.2.0-v9
container_name: redis-vector-db
ports:
- "6379:6379"
- "8001:8001"
retriever:
image: opea/retriever-redis:latest
container_name: retriever-redis-server
ports:
- "7000:7000"
ipc: host
environment:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: ${REDIS_URL}
INDEX_NAME: ${INDEX_NAME}
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
restart: unless-stopped

networks:
default:
driver: bridge
77 changes: 77 additions & 0 deletions comps/retrievers/llamaindex/redis_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

import os


def get_boolean_env_var(var_name, default_value=False):
"""Retrieve the boolean value of an environment variable.

Args:
var_name (str): The name of the environment variable to retrieve.
default_value (bool): The default value to return if the variable
is not found.

Returns:
bool: The value of the environment variable, interpreted as a boolean.
"""
true_values = {"true", "1", "t", "y", "yes"}
false_values = {"false", "0", "f", "n", "no"}

# Retrieve the environment variable's value
value = os.getenv(var_name, "").lower()

# Decide the boolean value based on the content of the string
if value in true_values:
return True
elif value in false_values:
return False
else:
return default_value


# Whether or not to enable langchain debugging
DEBUG = get_boolean_env_var("DEBUG", False)
# Set DEBUG env var to "true" if you wish to enable LC debugging module
if DEBUG:
import langchain

langchain.debug = True


# Embedding model
EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5")


# Redis Connection Information
REDIS_HOST = os.getenv("REDIS_HOST", "localhost")
REDIS_PORT = int(os.getenv("REDIS_PORT", 6379))


def format_redis_conn_from_env():
redis_url = os.getenv("REDIS_URL", None)
if redis_url:
return redis_url
else:
using_ssl = get_boolean_env_var("REDIS_SSL", False)
start = "rediss://" if using_ssl else "redis://"

# if using RBAC
password = os.getenv("REDIS_PASSWORD", None)
username = os.getenv("REDIS_USERNAME", "default")
if password is not None:
start += f"{username}:{password}@"

return start + f"{REDIS_HOST}:{REDIS_PORT}"


REDIS_URL = format_redis_conn_from_env()

# Vector Index Configuration
INDEX_NAME = os.getenv("INDEX_NAME", "rag-redis")


current_file_path = os.path.abspath(__file__)
parent_dir = os.path.dirname(current_file_path)
REDIS_SCHEMA = os.getenv("REDIS_SCHEMA", "redis_schema.yml")
INDEX_SCHEMA = os.path.join(parent_dir, REDIS_SCHEMA)
Loading
Loading