EdgeCraftRAG: Fix multiple issues (#1143)

Signed-off-by: Mingyuan Qi <mingyuan.qi@intel.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
opea-project · Nov 15, 2024 · 096a37a · 096a37a
1 parent 6f8fa6a
commit 096a37a
Show file tree

Hide file tree

Showing 26 changed files with 335 additions and 302 deletions.
diff --git a/EdgeCraftRAG/Dockerfile b/EdgeCraftRAG/Dockerfile
@@ -13,13 +13,11 @@ RUN useradd -m -s /bin/bash user && \
     mkdir -p /home/user && \
     chown -R user /home/user/
 
-COPY ./edgecraftrag /home/user/edgecraftrag
+COPY ./requirements.txt /home/user/requirements.txt
 COPY ./chatqna.py /home/user/chatqna.py
 
-WORKDIR /home/user/edgecraftrag
-RUN pip install --no-cache-dir -r requirements.txt
-
 WORKDIR /home/user
+RUN pip install --no-cache-dir -r requirements.txt
 
 USER user
 

diff --git a/EdgeCraftRAG/Dockerfile.server b/EdgeCraftRAG/Dockerfile.server
@@ -25,6 +25,9 @@ RUN useradd -m -s /bin/bash user && \
 
 COPY ./edgecraftrag /home/user/edgecraftrag
 
+RUN mkdir -p /home/user/gradio_cache 
+ENV GRADIO_TEMP_DIR=/home/user/gradio_cache
+
 WORKDIR /home/user/edgecraftrag
 RUN pip install --no-cache-dir -r requirements.txt
 

diff --git a/EdgeCraftRAG/README.md b/EdgeCraftRAG/README.md
@@ -7,39 +7,112 @@ quality and performance.
 
 ## Quick Start Guide
 
-### Run Containers with Docker Compose
+### (Optional) Build Docker Images for Mega Service, Server and UI by your own
+
+If you want to build the images by your own, please follow the steps:
+
+```bash
+cd GenAIExamples/EdgeCraftRAG
+
+docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --build-arg no_proxy=$no_proxy -t opea/edgecraftrag:latest -f Dockerfile .
+docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --build-arg no_proxy=$no_proxy -t opea/edgecraftrag-server:latest -f Dockerfile.server .
+docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --build-arg no_proxy=$no_proxy -t opea/edgecraftrag-ui:latest -f ui/docker/Dockerfile.ui .
+```
+
+### Using Intel Arc GPU
+
+#### Local inference with OpenVINO for Intel Arc GPU
+
+You can select "local" type in generation field which is the default approach to enable Intel Arc GPU for LLM. You don't need to build images for "local" type.
+
+#### vLLM with OpenVINO for Intel Arc GPU
+
+You can also select "vLLM" as generation type, to enable this type, you'll need to build the vLLM image for Intel Arc GPU before service bootstrap.
+Please follow this link [vLLM with OpenVINO](https://github.com/opea-project/GenAIComps/tree/main/comps/llms/text-generation/vllm/langchain#build-docker-image) to build the vLLM image.
+
+### Start Edge Craft RAG Services with Docker Compose
+
+If you want to enable vLLM with OpenVINO service, please finish the steps in [Launch vLLM with OpenVINO service](#optional-launch-vllm-with-openvino-service) first.
 
 ```bash
 cd GenAIExamples/EdgeCraftRAG/docker_compose/intel/gpu/arc
 
 export MODEL_PATH="your model path for all your models"
 export DOC_PATH="your doc path for uploading a dir of files"
+export GRADIO_PATH="your gradio cache path for transferring files"
+
+# Make sure all 3 folders have 1000:1000 permission, otherwise
+# chown 1000:1000 ${MODEL_PATH} ${DOC_PATH} ${GRADIO_PATH}
+
+# Use `ip a` to check your active ip
 export HOST_IP="your host ip"
-export UI_SERVICE_PORT="port for UI service"
 
-# Optional for vllm endpoint
-export vLLM_ENDPOINT="http://${HOST_IP}:8008"
+# Check group id of video and render
+export VIDEOGROUPID=$(getent group video | cut -d: -f3)
+export RENDERGROUPID=$(getent group render | cut -d: -f3)
 
 # If you have a proxy configured, uncomment below line
-# export no_proxy=$no_proxy,${HOST_IP},edgecraftrag,edgecraftrag-server
+# export no_proxy=${no_proxy},${HOST_IP},edgecraftrag,edgecraftrag-server
+# export NO_PROXY=${NO_PROXY},${HOST_IP},edgecraftrag,edgecraftrag-server
 # If you have a HF mirror configured, it will be imported to the container
 # export HF_ENDPOINT="your HF mirror endpoint"
 
 # By default, the ports of the containers are set, uncomment if you want to change
 # export MEGA_SERVICE_PORT=16011
 # export PIPELINE_SERVICE_PORT=16011
+# export UI_SERVICE_PORT="8082"
+
+# Prepare models for embedding, reranking and generation, you can also choose other OpenVINO optimized models
+# Here is the example:
+pip install --upgrade --upgrade-strategy eager "optimum[openvino]"
+
+optimum-cli export openvino -m BAAI/bge-small-en-v1.5 ${MODEL_PATH}/BAAI/bge-small-en-v1.5 --task sentence-similarity
+optimum-cli export openvino -m BAAI/bge-reranker-large ${MODEL_PATH}/BAAI/bge-reranker-large --task sentence-similarity
+optimum-cli export openvino -m Qwen/Qwen2-7B-Instruct ${MODEL_PATH}/Qwen/Qwen2-7B-Instruct/INT4_compressed_weights --weight-format int4
 
 docker compose up -d
+
 ```
 
-### (Optional) Build Docker Images for Mega Service, Server and UI by your own
+#### (Optional) Launch vLLM with OpenVINO service
+
+1. Set up Environment Variables
 
 ```bash
-cd GenAIExamples/EdgeCraftRAG
+export LLM_MODEL=#your model id
+export VLLM_SERVICE_PORT=8008
+export vLLM_ENDPOINT="http://${HOST_IP}:${VLLM_SERVICE_PORT}"
+export HUGGINGFACEHUB_API_TOKEN=#your HF token
+```
 
-docker build --build-arg http_proxy=$HTTP_PROXY --build-arg https_proxy=$HTTPS_PROXY --build-arg no_proxy=$NO_PROXY -t opea/edgecraftrag:latest -f Dockerfile .
-docker build --build-arg http_proxy=$HTTP_PROXY --build-arg https_proxy=$HTTPS_PROXY --build-arg no_proxy=$NO_PROXY -t opea/edgecraftrag-server:latest -f Dockerfile.server .
-docker build --build-arg http_proxy=$HTTP_PROXY --build-arg https_proxy=$HTTPS_PROXY --build-arg no_proxy=$NO_PROXY -t opea/edgecraftrag-ui:latest -f ui/docker/Dockerfile.ui .
+2. Uncomment below code in 'GenAIExamples/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml'
+
+```bash
+  # vllm-openvino-server:
+  #   container_name: vllm-openvino-server
+  #   image: opea/vllm-arc:latest
+  #   ports:
+  #     - ${VLLM_SERVICE_PORT:-8008}:80
+  #   environment:
+  #     HTTPS_PROXY: ${https_proxy}
+  #     HTTP_PROXY: ${https_proxy}
+  #     VLLM_OPENVINO_DEVICE: GPU
+  #     HF_ENDPOINT: ${HF_ENDPOINT}
+  #     HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+  #   volumes:
+  #     - /dev/dri/by-path:/dev/dri/by-path
+  #     - $HOME/.cache/huggingface:/root/.cache/huggingface
+  #   devices:
+  #     - /dev/dri
+  #   entrypoint: /bin/bash -c "\
+  #     cd / && \
+  #     export VLLM_CPU_KVCACHE_SPACE=50 && \
+  #     export VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS=ON && \
+  #     python3 -m vllm.entrypoints.openai.api_server \
+  #       --model '${LLM_MODEL}' \
+  #       --max_model_len=1024 \
+  #       --host 0.0.0.0 \
+  #       --port 80"
 ```
 
 ### ChatQnA with LLM Example (Command Line)
@@ -109,7 +182,7 @@ curl -X POST http://${HOST_IP}:16010/v1/settings/pipelines -H "Content-Type: app
 # }
 
 # Prepare data from local directory
-curl -X POST http://${HOST_IP}:16010/v1/data -H "Content-Type: application/json" -d '{"local_path":"#REPLACE WITH YOUR LOCAL DOC DIR#"}' | jq '.'
+curl -X POST http://${HOST_IP}:16010/v1/data -H "Content-Type: application/json" -d '{"local_path":"docs/#REPLACE WITH YOUR DIR WITHIN MOUNTED DOC PATH#"}' | jq '.'
 
 # Validate Mega Service
 curl -X POST http://${HOST_IP}:16011/v1/chatqna -H "Content-Type: application/json" -d '{"messages":"#REPLACE WITH YOUR QUESTION HERE#", "top_n":5, "max_tokens":512}' | jq '.'
@@ -121,33 +194,14 @@ Open your browser, access http://${HOST_IP}:8082
 
 > Your browser should be running on the same host of your console, otherwise you will need to access UI with your host domain name instead of ${HOST_IP}.
 
-### (Optional) Launch vLLM with OpenVINO service
+To create a default pipeline, you need to click the `Create Pipeline` button on the `RAG Settings` page. You can also create multiple pipelines or update existing pipelines through the `Pipeline Configuration`, but please note that active pipelines cannot be updated.
+![create_pipeline](assets/img/create_pipeline.png)
 
-```bash
-# 1. export LLM_MODEL
-export LLM_MODEL="your model id"
-# 2. Uncomment below code in 'GenAIExamples/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml'
-  # vllm-service:
-  #   image: vllm:openvino
-  #   container_name: vllm-openvino-server
-  #   depends_on:
-  #     - vllm-service
-  #   ports:
-  #     - "8008:80"
-  #   environment:
-  #     no_proxy: ${no_proxy}
-  #     http_proxy: ${http_proxy}
-  #     https_proxy: ${https_proxy}
-  #     vLLM_ENDPOINT: ${vLLM_ENDPOINT}
-  #     LLM_MODEL: ${LLM_MODEL}
-  #   entrypoint: /bin/bash -c "\
-  #     cd / && \
-  #     export VLLM_CPU_KVCACHE_SPACE=50 && \
-  #     python3 -m vllm.entrypoints.openai.api_server \
-  #       --model '${LLM_MODEL}' \
-  #       --host 0.0.0.0 \
-  #       --port 80"
-```
+After the pipeline creation, you can upload your data in the `Chatbot` page.
+![upload_data](assets/img/upload_data.png)
+
+Then, you can submit messages in the chat box.
+![chat_with_rag](assets/img/chat_with_rag.png)
 
 ## Advanced User Guide
 
@@ -156,27 +210,13 @@ export LLM_MODEL="your model id"
 #### Create a pipeline
 
 ```bash
-curl -X POST http://${HOST_IP}:16010/v1/settings/pipelines -H "Content-Type: application/json" -d @examples/test_pipeline.json | jq '.'
-```
-
-It will take some time to prepare the embedding model.
-
-#### Upload a text
-
-```bash
-curl -X POST http://${HOST_IP}:16010/v1/data -H "Content-Type: application/json" -d @examples/test_data.json | jq '.'
-```
-
-#### Provide a query to retrieve context with similarity search.
-
-```bash
-curl -X POST http://${HOST_IP}:16010/v1/retrieval -H "Content-Type: application/json" -d @examples/test_query.json | jq '.'
+curl -X POST http://${HOST_IP}:16010/v1/settings/pipelines -H "Content-Type: application/json" -d @tests/test_pipeline_local_llm.json | jq '.'
 ```
 
-#### Create the second pipeline test2
+#### Update a pipeline
 
 ```bash
-curl -X POST http://${HOST_IP}:16010/v1/settings/pipelines -H "Content-Type: application/json" -d @examples/test_pipeline2.json | jq '.'
+curl -X PATCH http://${HOST_IP}:16010/v1/settings/pipelines -H "Content-Type: application/json" -d @tests/test_pipeline_local_llm.json | jq '.'
 ```
 
 #### Check all pipelines
@@ -185,27 +225,18 @@ curl -X POST http://${HOST_IP}:16010/v1/settings/pipelines -H "Content-Type: app
 curl -X GET http://${HOST_IP}:16010/v1/settings/pipelines -H "Content-Type: application/json" | jq '.'
 ```
 
-#### Compare similarity retrieval (test1) and keyword retrieval (test2)
+#### Activate a pipeline
 
 ```bash
-# Activate pipeline test1
 curl -X PATCH http://${HOST_IP}:16010/v1/settings/pipelines/test1 -H "Content-Type: application/json" -d '{"active": "true"}' | jq '.'
-# Similarity retrieval
-curl -X POST http://${HOST_IP}:16010/v1/retrieval -H "Content-Type: application/json" -d '{"messages":"number"}' | jq '.'
-
-# Activate pipeline test2
-curl -X PATCH http://${HOST_IP}:16010/v1/settings/pipelines/test2 -H "Content-Type: application/json" -d '{"active": "true"}' | jq '.'
-# Keyword retrieval
-curl -X POST http://${HOST_IP}:16010/v1/retrieval -H "Content-Type: application/json" -d '{"messages":"number"}' | jq '.'
-
 ```
 
 ### Model Management
 
 #### Load a model
 
 ```bash
-curl -X POST http://${HOST_IP}:16010/v1/settings/models -H "Content-Type: application/json" -d @examples/test_model_load.json | jq '.'
+curl -X POST http://${HOST_IP}:16010/v1/settings/models -H "Content-Type: application/json" -d '{"model_type": "reranker", "model_id": "BAAI/bge-reranker-large", "model_path": "./models/bge_ov_reranker", "device": "cpu"}' | jq '.'
 ```
 
 It will take some time to load the model.
@@ -219,7 +250,7 @@ curl -X GET http://${HOST_IP}:16010/v1/settings/models -H "Content-Type: applica
 #### Update a model
 
 ```bash
-curl -X PATCH http://${HOST_IP}:16010/v1/settings/models/BAAI/bge-reranker-large -H "Content-Type: application/json" -d @examples/test_model_update.json | jq '.'
+curl -X PATCH http://${HOST_IP}:16010/v1/settings/models/BAAI/bge-reranker-large -H "Content-Type: application/json" -d '{"model_type": "reranker", "model_id": "BAAI/bge-reranker-large", "model_path": "./models/bge_ov_reranker", "device": "gpu"}' | jq '.'
 ```
 
 #### Check a certain model
@@ -239,14 +270,14 @@ curl -X DELETE http://${HOST_IP}:16010/v1/settings/models/BAAI/bge-reranker-larg
 #### Add a text
 
 ```bash
-curl -X POST http://${HOST_IP}:16010/v1/data -H "Content-Type: application/json" -d @examples/test_data.json | jq '.'
+curl -X POST http://${HOST_IP}:16010/v1/data -H "Content-Type: application/json" -d '{"text":"#REPLACE WITH YOUR TEXT"}' | jq '.'
 ```
 
 #### Add files from existed file path
 
 ```bash
-curl -X POST http://${HOST_IP}:16010/v1/data -H "Content-Type: application/json" -d @examples/test_data_dir.json | jq '.'
-curl -X POST http://${HOST_IP}:16010/v1/data -H "Content-Type: application/json" -d @examples/test_data_file.json | jq '.'
+curl -X POST http://${HOST_IP}:16010/v1/data -H "Content-Type: application/json" -d '{"local_path":"docs/#REPLACE WITH YOUR DIR WITHIN MOUNTED DOC PATH#"}' | jq '.'
+curl -X POST http://${HOST_IP}:16010/v1/data -H "Content-Type: application/json" -d '{"local_path":"docs/#REPLACE WITH YOUR FILE WITHIN MOUNTED DOC PATH#"}' | jq '.'
 ```
 
 #### Check all files
@@ -270,5 +301,5 @@ curl -X DELETE http://${HOST_IP}:16010/v1/data/files/test2.docx -H "Content-Type
 #### Update a file
 
 ```bash
-curl -X PATCH http://${HOST_IP}:16010/v1/data/files/test.pdf -H "Content-Type: application/json" -d @examples/test_data_file.json | jq '.'
+curl -X PATCH http://${HOST_IP}:16010/v1/data/files/test.pdf -H "Content-Type: application/json" -d '{"local_path":"docs/#REPLACE WITH YOUR FILE WITHIN MOUNTED DOC PATH#"}' | jq '.'
 ```
diff --git a/EdgeCraftRAG/assets/img/chat_with_rag.png b/EdgeCraftRAG/assets/img/chat_with_rag.png
diff --git a/EdgeCraftRAG/assets/img/create_pipeline.png b/EdgeCraftRAG/assets/img/create_pipeline.png
diff --git a/EdgeCraftRAG/assets/img/upload_data.png b/EdgeCraftRAG/assets/img/upload_data.png
diff --git a/EdgeCraftRAG/chatqna.py b/EdgeCraftRAG/chatqna.py
@@ -18,6 +18,7 @@
     ChatMessage,
     UsageInfo,
 )
+from comps.cores.proto.docarray import LLMParams
 from fastapi import Request
 from fastapi.responses import StreamingResponse
 
@@ -30,7 +31,20 @@ def __init__(self, megaservice, host="0.0.0.0", port=16011):
 
     async def handle_request(self, request: Request):
         input = await request.json()
-        result_dict, runtime_graph = await self.megaservice.schedule(initial_inputs=input)
+        stream_opt = input.get("stream", False)
+        chat_request = ChatCompletionRequest.parse_obj(input)
+        parameters = LLMParams(
+            max_tokens=chat_request.max_tokens if chat_request.max_tokens else 1024,
+            top_k=chat_request.top_k if chat_request.top_k else 10,
+            top_p=chat_request.top_p if chat_request.top_p else 0.95,
+            temperature=chat_request.temperature if chat_request.temperature else 0.01,
+            frequency_penalty=chat_request.frequency_penalty if chat_request.frequency_penalty else 0.0,
+            presence_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 0.0,
+            repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03,
+            streaming=stream_opt,
+            chat_template=chat_request.chat_template if chat_request.chat_template else None,
+        )
+        result_dict, runtime_graph = await self.megaservice.schedule(initial_inputs=input, llm_parameters=parameters)
         for node, response in result_dict.items():
             if isinstance(response, StreamingResponse):
                 return response
@@ -61,7 +75,7 @@ def add_remote_service(self):
             port=PIPELINE_SERVICE_PORT,
             endpoint="/v1/chatqna",
             use_remote_service=True,
-            service_type=ServiceType.UNDEFINED,
+            service_type=ServiceType.LLM,
         )
         self.megaservice.add(edgecraftrag)
         self.gateway = EdgeCraftRagGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)