Skip to content

Commit

Permalink
Merge branch 'main' into suyue/cd_wf
Browse files Browse the repository at this point in the history
  • Loading branch information
chensuyue authored Aug 14, 2024
2 parents 3923aeb + e71aba0 commit 2368c43
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 127 deletions.
107 changes: 39 additions & 68 deletions ChatQnA/docker/xeon/README_qdrant.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,37 +20,41 @@ qdrant-vector-db
Port 6333 - Open to 0.0.0.0/0
Port 6334 - Open to 0.0.0.0/0
dataprep-qdrant-server
======================
Port 6043 - Open to 0.0.0.0/0
tei_embedding_service
=====================
Port 6006 - Open to 0.0.0.0/0
Port 6040 - Open to 0.0.0.0/0
embedding
=========
Port 6000 - Open to 0.0.0.0/0
Port 6044 - Open to 0.0.0.0/0
retriever
=========
Port 7000 - Open to 0.0.0.0/0
Port 6045 - Open to 0.0.0.0/0
tei_xeon_service
tei_reranking_service
================
Port 8808 - Open to 0.0.0.0/0
Port 6041 - Open to 0.0.0.0/0
reranking
=========
Port 8000 - Open to 0.0.0.0/0
Port 6046 - Open to 0.0.0.0/0
tgi-service
===========
Port 9009 - Open to 0.0.0.0/0
Port 6042 - Open to 0.0.0.0/0
llm
===
Port 9000 - Open to 0.0.0.0/0
Port 6047 - Open to 0.0.0.0/0
chaqna-xeon-backend-server
==========================
Port 8888 - Open to 0.0.0.0/0
Port 8912 - Open to 0.0.0.0/0
chaqna-xeon-ui-server
=====================
Expand Down Expand Up @@ -126,10 +130,9 @@ Build frontend Docker image that enables Conversational experience with ChatQnA

```bash
cd GenAIExamples/ChatQnA/docker/ui/
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna"
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6008/v1/dataprep/get_file"
docker build --no-cache -t opea/chatqna-conversation-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT --build-arg DATAPREP_SERVICE_ENDPOINT=$DATAPREP_SERVICE_ENDPOINT --build-arg DATAPREP_GET_FILE_ENDPOINT=$DATAPREP_GET_FILE_ENDPOINT -f ./docker/Dockerfile.react .
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8912/v1/chatqna"
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6043/v1/dataprep"
docker build --no-cache -t opea/chatqna-conversation-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT --build-arg DATAPREP_SERVICE_ENDPOINT=$DATAPREP_SERVICE_ENDPOINT -f ./docker/Dockerfile.react .
cd ../../../..
```

Expand Down Expand Up @@ -178,9 +181,9 @@ export https_proxy=${your_http_proxy}
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
export TGI_LLM_ENDPOINT="http://${host_ip}:9009"
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6040"
export TEI_RERANKING_ENDPOINT="http://${host_ip}:6041"
export TGI_LLM_ENDPOINT="http://${host_ip}:6042"
export QDRANT_HOST=${host_ip}
export QDRANT_PORT=6333
export INDEX_NAME="rag-qdrant"
Expand All @@ -190,10 +193,8 @@ export EMBEDDING_SERVICE_HOST_IP=${host_ip}
export RETRIEVER_SERVICE_HOST_IP=${host_ip}
export RERANK_SERVICE_HOST_IP=${host_ip}
export LLM_SERVICE_HOST_IP=${host_ip}
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna"
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6008/v1/dataprep/get_file"
export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete_file"
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8912/v1/chatqna"
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6043/v1/dataprep"
```

Note: Please replace with `host_ip` with you external IP address, do not use localhost.
Expand All @@ -204,15 +205,15 @@ Note: Please replace with `host_ip` with you external IP address, do not use loc
```bash
cd GenAIExamples/ChatQnA/docker/xeon/
docker compose up -d
docker compose -f compose_qdrant.yaml up -d
```

### Validate Microservices

1. TEI Embedding Service

```bash
curl ${host_ip}:6006/embed \
curl ${host_ip}:6040/embed \
-X POST \
-d '{"inputs":"What is Deep Learning?"}' \
-H 'Content-Type: application/json'
Expand All @@ -221,25 +222,23 @@ curl ${host_ip}:6006/embed \
2. Embedding Microservice

```bash
curl http://${host_ip}:6000/v1/embeddings\
curl http://${host_ip}:6044/v1/embeddings\
-X POST \
-d '{"text":"hello"}' \
-H 'Content-Type: application/json'
```

3. Retriever Microservice
To validate the retriever microservice, you need to generate a mock embedding vector of length 768 in Python script:
3. Retriever Microservice

```Python
import random
embedding = [random.uniform(-1, 1) for _ in range(768)]
print(embedding)
```
To consume the retriever microservice, you need to generate a mock embedding vector by Python script. The length of embedding vector
is determined by the embedding model.
Here we use the model `EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"`, which vector size is 768.

Then substitute your mock embedding vector for the `${your_embedding}` in the following cURL command:
Check the vecotor dimension of your embedding model, set `your_embedding` dimension equals to it.

```bash
curl http://${host_ip}:7000/v1/retrieval \
export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
curl http://${host_ip}:6045/v1/retrieval \
-X POST \
-d '{"text":"What is the revenue of Nike in 2023?","embedding":"'"${your_embedding}"'"}' \
-H 'Content-Type: application/json'
Expand All @@ -248,7 +247,7 @@ curl http://${host_ip}:7000/v1/retrieval \
4. TEI Reranking Service

```bash
curl http://${host_ip}:8808/rerank \
curl http://${host_ip}:6041/rerank \
-X POST \
-d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \
-H 'Content-Type: application/json'
Expand All @@ -257,7 +256,7 @@ curl http://${host_ip}:8808/rerank \
5. Reranking Microservice

```bash
curl http://${host_ip}:8000/v1/reranking\
curl http://${host_ip}:6046/v1/reranking\
-X POST \
-d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
-H 'Content-Type: application/json'
Expand All @@ -266,7 +265,7 @@ curl http://${host_ip}:8000/v1/reranking\
6. TGI Service

```bash
curl http://${host_ip}:9009/generate \
curl http://${host_ip}:6042/generate \
-X POST \
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
-H 'Content-Type: application/json'
Expand All @@ -275,7 +274,7 @@ curl http://${host_ip}:9009/generate \
7. LLM Microservice

```bash
curl http://${host_ip}:9000/v1/chat/completions\
curl http://${host_ip}:6047/v1/chat/completions\
-X POST \
-d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
-H 'Content-Type: application/json'
Expand All @@ -284,7 +283,7 @@ curl http://${host_ip}:9000/v1/chat/completions\
8. MegaService

```bash
curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{
curl http://${host_ip}:8912/v1/chatqna -H "Content-Type: application/json" -d '{
"messages": "What is the revenue of Nike in 2023?"
}'
```
Expand All @@ -296,49 +295,21 @@ If you want to update the default knowledge base, you can use the following comm
Update Knowledge Base via Local File Upload:

```bash
curl -X POST "http://${host_ip}:6007/v1/dataprep" \
curl -X POST "http://${host_ip}:6043/v1/dataprep" \
-H "Content-Type: multipart/form-data" \
-F "files=@./nke-10k-2023.pdf"
-F "files=@./your_file.pdf"
```

This command updates a knowledge base by uploading a local file for processing. Update the file path according to your environment.

Add Knowledge Base via HTTP Links:

```bash
curl -X POST "http://${host_ip}:6007/v1/dataprep" \
curl -X POST "http://${host_ip}:6043/v1/dataprep" \
-H "Content-Type: multipart/form-data" \
-F 'link_list=["https://opea.dev"]'
```

This command updates a knowledge base by submitting a list of HTTP links for processing.

Also, you are able to get the file list that you uploaded:

```bash
curl -X POST "http://${host_ip}:6008/v1/dataprep/get_file" \
-H "Content-Type: application/json"
```

To delete the file/link you uploaded:

```bash
# delete link
curl -X POST "http://${host_ip}:6009/v1/dataprep/delete_file" \
-d '{"file_path": "https://opea.dev"}' \
-H "Content-Type: application/json"

# delete file
curl -X POST "http://${host_ip}:6009/v1/dataprep/delete_file" \
-d '{"file_path": "nke-10k-2023.pdf"}' \
-H "Content-Type: application/json"

# delete all uploaded files and links
curl -X POST "http://${host_ip}:6009/v1/dataprep/delete_file" \
-d '{"file_path": "all"}' \
-H "Content-Type: application/json"
```

## 🚀 Launch the UI

To access the frontend, open the following URL in your browser: http://{host_ip}:5173. By default, the UI runs on port 5173 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below:
Expand Down
38 changes: 16 additions & 22 deletions ChatQnA/docker/xeon/compose_qdrant.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,21 @@ services:
- qdrant-vector-db
- tei-embedding-service
ports:
- "6000:6000"
- "6043:6007"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
QDRANT: ${host_ip}
QDRANT_HOST: ${QDRANT_HOST}
QDRANT_PORT: 6333
COLLECTION_NAME: ${INDEX_NAME}
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
tei-embedding-service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
container_name: tei-embedding-server
ports:
- "6006:80"
- "6040:80"
volumes:
- "./data:/data"
shm_size: 1g
Expand All @@ -47,39 +47,35 @@ services:
depends_on:
- tei-embedding-service
ports:
- "6000:6000"
- "6044:6000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
LANGCHAIN_PROJECT: "opea-embedding-service"
restart: unless-stopped
retriever:
image: opea/retriever-qdrant:latest
container_name: retriever-qdrant-server
depends_on:
- qdrant-vector-db
ports:
- "7000:7000"
- "6045:7000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
QDRANT_HOST: ${host_ip}
QDRANT_HOST: ${QDRANT_HOST}
QDRANT_PORT: 6333
INDEX_NAME: ${INDEX_NAME}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
restart: unless-stopped
tei-reranking-service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
container_name: tei-reranking-server
ports:
- "8808:80"
- "6041:80"
volumes:
- "./data:/data"
shm_size: 1g
Expand All @@ -97,7 +93,7 @@ services:
depends_on:
- tei-reranking-service
ports:
- "8000:8000"
- "6046:8000"
ipc: host
environment:
no_proxy: ${no_proxy}
Expand All @@ -107,15 +103,12 @@ services:
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
LANGCHAIN_PROJECT: "opea-reranking-service"
restart: unless-stopped
tgi-service:
image: ghcr.io/huggingface/text-generation-inference:2.1.0
container_name: tgi-service
ports:
- "9009:80"
- "6042:80"
volumes:
- "./data:/data"
shm_size: 1g
Expand All @@ -133,7 +126,7 @@ services:
depends_on:
- tgi-service
ports:
- "9000:9000"
- "6047:9000"
ipc: host
environment:
no_proxy: ${no_proxy}
Expand All @@ -143,9 +136,6 @@ services:
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
LANGCHAIN_PROJECT: "opea-llm-service"
restart: unless-stopped
chaqna-xeon-backend-server:
image: opea/chatqna:latest
Expand All @@ -160,16 +150,20 @@ services:
- tgi-service
- llm
ports:
- "8888:8888"
- "8912:8888"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
- EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
- EMBEDDING_SERVICE_PORT=${EMBEDDING_SERVICE_PORT}
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
- RETRIEVER_SERVICE_PORT=${RETRIEVER_SERVICE_PORT}
- RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
- RERANK_SERVICE_PORT=${RERANK_SERVICE_PORT}
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
- LLM_SERVICE_PORT=${LLM_SERVICE_PORT}
ipc: host
restart: always
chaqna-xeon-ui-server:
Expand Down
Loading

0 comments on commit 2368c43

Please sign in to comment.