Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove ingest in Retriever MS #270

Merged
merged 5 commits into from
Jul 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion comps/retrievers/langchain/redis/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ export LANGCHAIN_PROJECT="opea/retrievers"

```bash
cd ../../
docker build -t opea/retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/langchain/docker/Dockerfile .
docker build -t opea/retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/langchain/redis/docker/Dockerfile .
```

To start a docker container, you have two options:
Expand Down
4 changes: 1 addition & 3 deletions comps/retrievers/langchain/redis/docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@ RUN useradd -m -s /bin/bash user && \

COPY comps /home/user/comps

RUN chmod +x /home/user/comps/retrievers/langchain/redis/run.sh

USER user

RUN pip install --no-cache-dir --upgrade pip && \
Expand All @@ -29,4 +27,4 @@ ENV PYTHONPATH=$PYTHONPATH:/home/user

WORKDIR /home/user/comps/retrievers/langchain/redis

ENTRYPOINT ["/home/user/comps/retrievers/langchain/redis/run.sh"]
ENTRYPOINT ["python", "retriever_redis.py"]
16 changes: 9 additions & 7 deletions comps/retrievers/langchain/redis/retriever_redis.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings
from langchain_community.vectorstores import Redis
from langsmith import traceable
from redis_config import EMBED_MODEL, INDEX_NAME, INDEX_SCHEMA, REDIS_URL
from redis_config import EMBED_MODEL, INDEX_NAME, REDIS_URL

from comps import (
EmbedDoc768,
Expand All @@ -34,6 +34,13 @@
@register_statistics(names=["opea_service@retriever_redis"])
def retrieve(input: EmbedDoc768) -> SearchedDoc:
start = time.time()
# check if the Redis index has data
if vector_db.client.keys() == []:
result = SearchedDoc(retrieved_docs=[], initial_query=input.text)
statistics_dict["opea_service@retriever_redis"].append_latency(time.time() - start, None)
return result

# if the Redis index has data, perform the search
if input.search_type == "similarity":
search_res = vector_db.similarity_search_by_vector(embedding=input.embedding, k=input.k)
elif input.search_type == "similarity_distance_threshold":
Expand Down Expand Up @@ -68,10 +75,5 @@ def retrieve(input: EmbedDoc768) -> SearchedDoc:
# create embeddings using local embedding model
embeddings = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL)

vector_db = Redis.from_existing_index(
embedding=embeddings,
index_name=INDEX_NAME,
redis_url=REDIS_URL,
schema=INDEX_SCHEMA,
)
vector_db = Redis(embedding=embeddings, index_name=INDEX_NAME, redis_url=REDIS_URL)
opea_microservices["opea_service@retriever_redis"].start()
9 changes: 0 additions & 9 deletions comps/retrievers/langchain/redis/run.sh

This file was deleted.

31 changes: 24 additions & 7 deletions tests/test_retrievers_langchain_redis.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,17 @@
set -xe

WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
ip_address=$(hostname -I | awk '{print $1}')

function build_docker_images() {
cd $WORKPATH
docker build --no-cache -t opea/retriever-redis:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/langchain/redis/docker/Dockerfile .
}

function start_service() {
# redis
docker run -d --name test-redis-vector-db -p 5010:6379 -p 5011:8001 -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy redis/redis-stack:7.2.0-v9
docker run -d --name test-comps-retriever-redis-vector-db -p 5010:6379 -p 5011:8001 -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy redis/redis-stack:7.2.0-v9
sleep 10s

# tei endpoint
Expand All @@ -37,13 +39,28 @@ function validate_microservice() {
retriever_port=5009
export PATH="${HOME}/miniforge3/bin:$PATH"
source activate
URL="http://${ip_address}:$retriever_port/v1/retrieval"
test_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
http_proxy='' curl http://${ip_address}:$retriever_port/v1/retrieval \
-X POST \
-d "{\"text\":\"test\",\"embedding\":${test_embedding}}" \
-H 'Content-Type: application/json'
docker logs test-comps-retriever-redis-server
docker logs test-comps-retriever-tei-endpoint

HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" -H 'Content-Type: application/json' "$URL")
if [ "$HTTP_STATUS" -eq 200 ]; then
echo "[ retriever ] HTTP status is 200. Checking content..."
local CONTENT=$(curl -s -X POST -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/retriever.log)

if echo "$CONTENT" | grep -q "retrieved_docs"; then
echo "[ retriever ] Content is as expected."
else
echo "[ retriever ] Content does not match the expected result: $CONTENT"
docker logs test-comps-retriever-redis-server >> ${LOG_PATH}/retriever.log
exit 1
fi
else
echo "[ retriever ] HTTP status is not 200. Received status was $HTTP_STATUS"
docker logs test-comps-retriever-redis-server >> ${LOG_PATH}/retriever.log
exit 1
fi

docker logs test-comps-retriever-tei-endpoint >> ${LOG_PATH}/tei.log
}

function stop_docker() {
Expand Down