diff --git a/EdgeCraftRAG/Dockerfile b/EdgeCraftRAG/Dockerfile
index 3c9711dea..2e6191a01 100644
--- a/EdgeCraftRAG/Dockerfile
+++ b/EdgeCraftRAG/Dockerfile
@@ -13,13 +13,11 @@ RUN useradd -m -s /bin/bash user && \
mkdir -p /home/user && \
chown -R user /home/user/
-COPY ./edgecraftrag /home/user/edgecraftrag
+COPY ./requirements.txt /home/user/requirements.txt
COPY ./chatqna.py /home/user/chatqna.py
-WORKDIR /home/user/edgecraftrag
-RUN pip install --no-cache-dir -r requirements.txt
-
WORKDIR /home/user
+RUN pip install --no-cache-dir -r requirements.txt
USER user
diff --git a/EdgeCraftRAG/Dockerfile.server b/EdgeCraftRAG/Dockerfile.server
index c04dc0a54..f076dcd16 100644
--- a/EdgeCraftRAG/Dockerfile.server
+++ b/EdgeCraftRAG/Dockerfile.server
@@ -25,6 +25,9 @@ RUN useradd -m -s /bin/bash user && \
COPY ./edgecraftrag /home/user/edgecraftrag
+RUN mkdir -p /home/user/gradio_cache
+ENV GRADIO_TEMP_DIR=/home/user/gradio_cache
+
WORKDIR /home/user/edgecraftrag
RUN pip install --no-cache-dir -r requirements.txt
diff --git a/EdgeCraftRAG/README.md b/EdgeCraftRAG/README.md
index da8d2efb0..a24822532 100644
--- a/EdgeCraftRAG/README.md
+++ b/EdgeCraftRAG/README.md
@@ -7,39 +7,112 @@ quality and performance.
## Quick Start Guide
-### Run Containers with Docker Compose
+### (Optional) Build Docker Images for Mega Service, Server and UI by your own
+
+If you want to build the images by your own, please follow the steps:
+
+```bash
+cd GenAIExamples/EdgeCraftRAG
+
+docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --build-arg no_proxy=$no_proxy -t opea/edgecraftrag:latest -f Dockerfile .
+docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --build-arg no_proxy=$no_proxy -t opea/edgecraftrag-server:latest -f Dockerfile.server .
+docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --build-arg no_proxy=$no_proxy -t opea/edgecraftrag-ui:latest -f ui/docker/Dockerfile.ui .
+```
+
+### Using Intel Arc GPU
+
+#### Local inference with OpenVINO for Intel Arc GPU
+
+You can select "local" type in generation field which is the default approach to enable Intel Arc GPU for LLM. You don't need to build images for "local" type.
+
+#### vLLM with OpenVINO for Intel Arc GPU
+
+You can also select "vLLM" as generation type, to enable this type, you'll need to build the vLLM image for Intel Arc GPU before service bootstrap.
+Please follow this link [vLLM with OpenVINO](https://github.com/opea-project/GenAIComps/tree/main/comps/llms/text-generation/vllm/langchain#build-docker-image) to build the vLLM image.
+
+### Start Edge Craft RAG Services with Docker Compose
+
+If you want to enable vLLM with OpenVINO service, please finish the steps in [Launch vLLM with OpenVINO service](#optional-launch-vllm-with-openvino-service) first.
```bash
cd GenAIExamples/EdgeCraftRAG/docker_compose/intel/gpu/arc
export MODEL_PATH="your model path for all your models"
export DOC_PATH="your doc path for uploading a dir of files"
+export GRADIO_PATH="your gradio cache path for transferring files"
+
+# Make sure all 3 folders have 1000:1000 permission, otherwise
+# chown 1000:1000 ${MODEL_PATH} ${DOC_PATH} ${GRADIO_PATH}
+
+# Use `ip a` to check your active ip
export HOST_IP="your host ip"
-export UI_SERVICE_PORT="port for UI service"
-# Optional for vllm endpoint
-export vLLM_ENDPOINT="http://${HOST_IP}:8008"
+# Check group id of video and render
+export VIDEOGROUPID=$(getent group video | cut -d: -f3)
+export RENDERGROUPID=$(getent group render | cut -d: -f3)
# If you have a proxy configured, uncomment below line
-# export no_proxy=$no_proxy,${HOST_IP},edgecraftrag,edgecraftrag-server
+# export no_proxy=${no_proxy},${HOST_IP},edgecraftrag,edgecraftrag-server
+# export NO_PROXY=${NO_PROXY},${HOST_IP},edgecraftrag,edgecraftrag-server
# If you have a HF mirror configured, it will be imported to the container
# export HF_ENDPOINT="your HF mirror endpoint"
# By default, the ports of the containers are set, uncomment if you want to change
# export MEGA_SERVICE_PORT=16011
# export PIPELINE_SERVICE_PORT=16011
+# export UI_SERVICE_PORT="8082"
+
+# Prepare models for embedding, reranking and generation, you can also choose other OpenVINO optimized models
+# Here is the example:
+pip install --upgrade --upgrade-strategy eager "optimum[openvino]"
+
+optimum-cli export openvino -m BAAI/bge-small-en-v1.5 ${MODEL_PATH}/BAAI/bge-small-en-v1.5 --task sentence-similarity
+optimum-cli export openvino -m BAAI/bge-reranker-large ${MODEL_PATH}/BAAI/bge-reranker-large --task sentence-similarity
+optimum-cli export openvino -m Qwen/Qwen2-7B-Instruct ${MODEL_PATH}/Qwen/Qwen2-7B-Instruct/INT4_compressed_weights --weight-format int4
docker compose up -d
+
```
-### (Optional) Build Docker Images for Mega Service, Server and UI by your own
+#### (Optional) Launch vLLM with OpenVINO service
+
+1. Set up Environment Variables
```bash
-cd GenAIExamples/EdgeCraftRAG
+export LLM_MODEL=#your model id
+export VLLM_SERVICE_PORT=8008
+export vLLM_ENDPOINT="http://${HOST_IP}:${VLLM_SERVICE_PORT}"
+export HUGGINGFACEHUB_API_TOKEN=#your HF token
+```
-docker build --build-arg http_proxy=$HTTP_PROXY --build-arg https_proxy=$HTTPS_PROXY --build-arg no_proxy=$NO_PROXY -t opea/edgecraftrag:latest -f Dockerfile .
-docker build --build-arg http_proxy=$HTTP_PROXY --build-arg https_proxy=$HTTPS_PROXY --build-arg no_proxy=$NO_PROXY -t opea/edgecraftrag-server:latest -f Dockerfile.server .
-docker build --build-arg http_proxy=$HTTP_PROXY --build-arg https_proxy=$HTTPS_PROXY --build-arg no_proxy=$NO_PROXY -t opea/edgecraftrag-ui:latest -f ui/docker/Dockerfile.ui .
+2. Uncomment below code in 'GenAIExamples/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml'
+
+```bash
+ # vllm-openvino-server:
+ # container_name: vllm-openvino-server
+ # image: opea/vllm-arc:latest
+ # ports:
+ # - ${VLLM_SERVICE_PORT:-8008}:80
+ # environment:
+ # HTTPS_PROXY: ${https_proxy}
+ # HTTP_PROXY: ${https_proxy}
+ # VLLM_OPENVINO_DEVICE: GPU
+ # HF_ENDPOINT: ${HF_ENDPOINT}
+ # HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+ # volumes:
+ # - /dev/dri/by-path:/dev/dri/by-path
+ # - $HOME/.cache/huggingface:/root/.cache/huggingface
+ # devices:
+ # - /dev/dri
+ # entrypoint: /bin/bash -c "\
+ # cd / && \
+ # export VLLM_CPU_KVCACHE_SPACE=50 && \
+ # export VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS=ON && \
+ # python3 -m vllm.entrypoints.openai.api_server \
+ # --model '${LLM_MODEL}' \
+ # --max_model_len=1024 \
+ # --host 0.0.0.0 \
+ # --port 80"
```
### ChatQnA with LLM Example (Command Line)
@@ -109,7 +182,7 @@ curl -X POST http://${HOST_IP}:16010/v1/settings/pipelines -H "Content-Type: app
# }
# Prepare data from local directory
-curl -X POST http://${HOST_IP}:16010/v1/data -H "Content-Type: application/json" -d '{"local_path":"#REPLACE WITH YOUR LOCAL DOC DIR#"}' | jq '.'
+curl -X POST http://${HOST_IP}:16010/v1/data -H "Content-Type: application/json" -d '{"local_path":"docs/#REPLACE WITH YOUR DIR WITHIN MOUNTED DOC PATH#"}' | jq '.'
# Validate Mega Service
curl -X POST http://${HOST_IP}:16011/v1/chatqna -H "Content-Type: application/json" -d '{"messages":"#REPLACE WITH YOUR QUESTION HERE#", "top_n":5, "max_tokens":512}' | jq '.'
@@ -121,33 +194,14 @@ Open your browser, access http://${HOST_IP}:8082
> Your browser should be running on the same host of your console, otherwise you will need to access UI with your host domain name instead of ${HOST_IP}.
-### (Optional) Launch vLLM with OpenVINO service
+To create a default pipeline, you need to click the `Create Pipeline` button on the `RAG Settings` page. You can also create multiple pipelines or update existing pipelines through the `Pipeline Configuration`, but please note that active pipelines cannot be updated.
+![create_pipeline](assets/img/create_pipeline.png)
-```bash
-# 1. export LLM_MODEL
-export LLM_MODEL="your model id"
-# 2. Uncomment below code in 'GenAIExamples/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml'
- # vllm-service:
- # image: vllm:openvino
- # container_name: vllm-openvino-server
- # depends_on:
- # - vllm-service
- # ports:
- # - "8008:80"
- # environment:
- # no_proxy: ${no_proxy}
- # http_proxy: ${http_proxy}
- # https_proxy: ${https_proxy}
- # vLLM_ENDPOINT: ${vLLM_ENDPOINT}
- # LLM_MODEL: ${LLM_MODEL}
- # entrypoint: /bin/bash -c "\
- # cd / && \
- # export VLLM_CPU_KVCACHE_SPACE=50 && \
- # python3 -m vllm.entrypoints.openai.api_server \
- # --model '${LLM_MODEL}' \
- # --host 0.0.0.0 \
- # --port 80"
-```
+After the pipeline creation, you can upload your data in the `Chatbot` page.
+![upload_data](assets/img/upload_data.png)
+
+Then, you can submit messages in the chat box.
+![chat_with_rag](assets/img/chat_with_rag.png)
## Advanced User Guide
@@ -156,27 +210,13 @@ export LLM_MODEL="your model id"
#### Create a pipeline
```bash
-curl -X POST http://${HOST_IP}:16010/v1/settings/pipelines -H "Content-Type: application/json" -d @examples/test_pipeline.json | jq '.'
-```
-
-It will take some time to prepare the embedding model.
-
-#### Upload a text
-
-```bash
-curl -X POST http://${HOST_IP}:16010/v1/data -H "Content-Type: application/json" -d @examples/test_data.json | jq '.'
-```
-
-#### Provide a query to retrieve context with similarity search.
-
-```bash
-curl -X POST http://${HOST_IP}:16010/v1/retrieval -H "Content-Type: application/json" -d @examples/test_query.json | jq '.'
+curl -X POST http://${HOST_IP}:16010/v1/settings/pipelines -H "Content-Type: application/json" -d @tests/test_pipeline_local_llm.json | jq '.'
```
-#### Create the second pipeline test2
+#### Update a pipeline
```bash
-curl -X POST http://${HOST_IP}:16010/v1/settings/pipelines -H "Content-Type: application/json" -d @examples/test_pipeline2.json | jq '.'
+curl -X PATCH http://${HOST_IP}:16010/v1/settings/pipelines -H "Content-Type: application/json" -d @tests/test_pipeline_local_llm.json | jq '.'
```
#### Check all pipelines
@@ -185,19 +225,10 @@ curl -X POST http://${HOST_IP}:16010/v1/settings/pipelines -H "Content-Type: app
curl -X GET http://${HOST_IP}:16010/v1/settings/pipelines -H "Content-Type: application/json" | jq '.'
```
-#### Compare similarity retrieval (test1) and keyword retrieval (test2)
+#### Activate a pipeline
```bash
-# Activate pipeline test1
curl -X PATCH http://${HOST_IP}:16010/v1/settings/pipelines/test1 -H "Content-Type: application/json" -d '{"active": "true"}' | jq '.'
-# Similarity retrieval
-curl -X POST http://${HOST_IP}:16010/v1/retrieval -H "Content-Type: application/json" -d '{"messages":"number"}' | jq '.'
-
-# Activate pipeline test2
-curl -X PATCH http://${HOST_IP}:16010/v1/settings/pipelines/test2 -H "Content-Type: application/json" -d '{"active": "true"}' | jq '.'
-# Keyword retrieval
-curl -X POST http://${HOST_IP}:16010/v1/retrieval -H "Content-Type: application/json" -d '{"messages":"number"}' | jq '.'
-
```
### Model Management
@@ -205,7 +236,7 @@ curl -X POST http://${HOST_IP}:16010/v1/retrieval -H "Content-Type: application/
#### Load a model
```bash
-curl -X POST http://${HOST_IP}:16010/v1/settings/models -H "Content-Type: application/json" -d @examples/test_model_load.json | jq '.'
+curl -X POST http://${HOST_IP}:16010/v1/settings/models -H "Content-Type: application/json" -d '{"model_type": "reranker", "model_id": "BAAI/bge-reranker-large", "model_path": "./models/bge_ov_reranker", "device": "cpu"}' | jq '.'
```
It will take some time to load the model.
@@ -219,7 +250,7 @@ curl -X GET http://${HOST_IP}:16010/v1/settings/models -H "Content-Type: applica
#### Update a model
```bash
-curl -X PATCH http://${HOST_IP}:16010/v1/settings/models/BAAI/bge-reranker-large -H "Content-Type: application/json" -d @examples/test_model_update.json | jq '.'
+curl -X PATCH http://${HOST_IP}:16010/v1/settings/models/BAAI/bge-reranker-large -H "Content-Type: application/json" -d '{"model_type": "reranker", "model_id": "BAAI/bge-reranker-large", "model_path": "./models/bge_ov_reranker", "device": "gpu"}' | jq '.'
```
#### Check a certain model
@@ -239,14 +270,14 @@ curl -X DELETE http://${HOST_IP}:16010/v1/settings/models/BAAI/bge-reranker-larg
#### Add a text
```bash
-curl -X POST http://${HOST_IP}:16010/v1/data -H "Content-Type: application/json" -d @examples/test_data.json | jq '.'
+curl -X POST http://${HOST_IP}:16010/v1/data -H "Content-Type: application/json" -d '{"text":"#REPLACE WITH YOUR TEXT"}' | jq '.'
```
#### Add files from existed file path
```bash
-curl -X POST http://${HOST_IP}:16010/v1/data -H "Content-Type: application/json" -d @examples/test_data_dir.json | jq '.'
-curl -X POST http://${HOST_IP}:16010/v1/data -H "Content-Type: application/json" -d @examples/test_data_file.json | jq '.'
+curl -X POST http://${HOST_IP}:16010/v1/data -H "Content-Type: application/json" -d '{"local_path":"docs/#REPLACE WITH YOUR DIR WITHIN MOUNTED DOC PATH#"}' | jq '.'
+curl -X POST http://${HOST_IP}:16010/v1/data -H "Content-Type: application/json" -d '{"local_path":"docs/#REPLACE WITH YOUR FILE WITHIN MOUNTED DOC PATH#"}' | jq '.'
```
#### Check all files
@@ -270,5 +301,5 @@ curl -X DELETE http://${HOST_IP}:16010/v1/data/files/test2.docx -H "Content-Type
#### Update a file
```bash
-curl -X PATCH http://${HOST_IP}:16010/v1/data/files/test.pdf -H "Content-Type: application/json" -d @examples/test_data_file.json | jq '.'
+curl -X PATCH http://${HOST_IP}:16010/v1/data/files/test.pdf -H "Content-Type: application/json" -d '{"local_path":"docs/#REPLACE WITH YOUR FILE WITHIN MOUNTED DOC PATH#"}' | jq '.'
```
diff --git a/EdgeCraftRAG/assets/img/chat_with_rag.png b/EdgeCraftRAG/assets/img/chat_with_rag.png
new file mode 100644
index 000000000..04000ef37
Binary files /dev/null and b/EdgeCraftRAG/assets/img/chat_with_rag.png differ
diff --git a/EdgeCraftRAG/assets/img/create_pipeline.png b/EdgeCraftRAG/assets/img/create_pipeline.png
new file mode 100644
index 000000000..53331b2b7
Binary files /dev/null and b/EdgeCraftRAG/assets/img/create_pipeline.png differ
diff --git a/EdgeCraftRAG/assets/img/upload_data.png b/EdgeCraftRAG/assets/img/upload_data.png
new file mode 100644
index 000000000..8fff43e68
Binary files /dev/null and b/EdgeCraftRAG/assets/img/upload_data.png differ
diff --git a/EdgeCraftRAG/chatqna.py b/EdgeCraftRAG/chatqna.py
index 1afa9621c..02f0a84dd 100644
--- a/EdgeCraftRAG/chatqna.py
+++ b/EdgeCraftRAG/chatqna.py
@@ -18,6 +18,7 @@
ChatMessage,
UsageInfo,
)
+from comps.cores.proto.docarray import LLMParams
from fastapi import Request
from fastapi.responses import StreamingResponse
@@ -30,7 +31,20 @@ def __init__(self, megaservice, host="0.0.0.0", port=16011):
async def handle_request(self, request: Request):
input = await request.json()
- result_dict, runtime_graph = await self.megaservice.schedule(initial_inputs=input)
+ stream_opt = input.get("stream", False)
+ chat_request = ChatCompletionRequest.parse_obj(input)
+ parameters = LLMParams(
+ max_tokens=chat_request.max_tokens if chat_request.max_tokens else 1024,
+ top_k=chat_request.top_k if chat_request.top_k else 10,
+ top_p=chat_request.top_p if chat_request.top_p else 0.95,
+ temperature=chat_request.temperature if chat_request.temperature else 0.01,
+ frequency_penalty=chat_request.frequency_penalty if chat_request.frequency_penalty else 0.0,
+ presence_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 0.0,
+ repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03,
+ streaming=stream_opt,
+ chat_template=chat_request.chat_template if chat_request.chat_template else None,
+ )
+ result_dict, runtime_graph = await self.megaservice.schedule(initial_inputs=input, llm_parameters=parameters)
for node, response in result_dict.items():
if isinstance(response, StreamingResponse):
return response
@@ -61,7 +75,7 @@ def add_remote_service(self):
port=PIPELINE_SERVICE_PORT,
endpoint="/v1/chatqna",
use_remote_service=True,
- service_type=ServiceType.UNDEFINED,
+ service_type=ServiceType.LLM,
)
self.megaservice.add(edgecraftrag)
self.gateway = EdgeCraftRagGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
diff --git a/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml b/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml
index f877b7c58..a695fbc02 100644
--- a/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml
+++ b/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml
@@ -14,12 +14,15 @@ services:
volumes:
- ${MODEL_PATH:-${PWD}}:/home/user/models
- ${DOC_PATH:-${PWD}}:/home/user/docs
+ - ${GRADIO_PATH:-${PWD}}:/home/user/gradio_cache
+ - ${HF_CACHE:-${HOME}/.cache}:/home/user/.cache
ports:
- ${PIPELINE_SERVICE_PORT:-16010}:${PIPELINE_SERVICE_PORT:-16010}
devices:
- /dev/dri:/dev/dri
group_add:
- - video
+ - ${VIDEOGROUPID:-44}
+ - ${RENDERGROUPID:-109}
ecrag:
image: ${REGISTRY:-opea}/edgecraftrag:${TAG:-latest}
container_name: edgecraftrag
@@ -48,31 +51,42 @@ services:
PIPELINE_SERVICE_HOST_IP: ${PIPELINE_SERVICE_HOST_IP:-${HOST_IP}}
UI_SERVICE_PORT: ${UI_SERVICE_PORT:-8082}
UI_SERVICE_HOST_IP: ${UI_SERVICE_HOST_IP:-0.0.0.0}
+ volumes:
+ - ${GRADIO_PATH:-${PWD}}:/home/user/gradio_cache
ports:
- - ${UI_SERVICE_PORT:-8082}:${UI_SERVICE_PORT:-8082}
+ - ${UI_SERVICE_PORT:-8082}:${UI_SERVICE_PORT:-8082}
restart: always
depends_on:
- server
- ecrag
- # vllm-service:
- # image: vllm:openvino
+ # vllm-openvino-server:
# container_name: vllm-openvino-server
+ # image: opea/vllm-arc:latest
# ports:
- # - "8008:80"
+ # - ${VLLM_SERVICE_PORT:-8008}:80
# environment:
- # no_proxy: ${no_proxy}
- # http_proxy: ${http_proxy}
- # https_proxy: ${https_proxy}
- # vLLM_ENDPOINT: ${vLLM_ENDPOINT}
- # LLM_MODEL: ${LLM_MODEL}
+ # HTTPS_PROXY: ${https_proxy}
+ # HTTP_PROXY: ${https_proxy}
+ # VLLM_OPENVINO_DEVICE: GPU
+ # HF_ENDPOINT: ${HF_ENDPOINT}
+ # HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+ # volumes:
+ # - /dev/dri/by-path:/dev/dri/by-path
+ # - $HOME/.cache/huggingface:/root/.cache/huggingface
+ # devices:
+ # - /dev/dri
+ # group_add:
+ # - ${VIDEOGROUPID:-44}
+ # - ${RENDERGROUPID:-109}
# entrypoint: /bin/bash -c "\
# cd / && \
# export VLLM_CPU_KVCACHE_SPACE=50 && \
+ # export VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS=ON && \
# python3 -m vllm.entrypoints.openai.api_server \
# --model '${LLM_MODEL}' \
+ # --max_model_len=1024 \
# --host 0.0.0.0 \
# --port 80"
-
networks:
default:
driver: bridge
diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/chatqna.py b/EdgeCraftRAG/edgecraftrag/api/v1/chatqna.py
index dfd32c29e..8249950d0 100644
--- a/EdgeCraftRAG/edgecraftrag/api/v1/chatqna.py
+++ b/EdgeCraftRAG/edgecraftrag/api/v1/chatqna.py
@@ -25,5 +25,8 @@ async def retrieval(request: ChatCompletionRequest):
# ChatQnA
@chatqna_app.post(path="/v1/chatqna")
async def chatqna(request: ChatCompletionRequest):
- ret = ctx.get_pipeline_mgr().run_pipeline(chat_request=request)
- return str(ret)
+ if request.stream:
+ return ctx.get_pipeline_mgr().run_pipeline(chat_request=request)
+ else:
+ ret = ctx.get_pipeline_mgr().run_pipeline(chat_request=request)
+ return str(ret)
diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/pipeline.py b/EdgeCraftRAG/edgecraftrag/api/v1/pipeline.py
index 9d008e82f..f58390cfd 100644
--- a/EdgeCraftRAG/edgecraftrag/api/v1/pipeline.py
+++ b/EdgeCraftRAG/edgecraftrag/api/v1/pipeline.py
@@ -157,16 +157,13 @@ def update_pipeline_handler(pl, req):
gen = req.generator
if gen.model is None:
return "No ChatQnA Model"
- if gen.inference_type == InferenceType.VLLM:
- if gen.model.model_id:
- model_ref = gen.model.model_id
- else:
- model_ref = gen.model.model_path
- pl.generator = QnAGenerator(model_ref, gen.prompt_path, gen.inference_type)
- elif gen.inference_type == InferenceType.LOCAL:
+ if gen.inference_type:
model = ctx.get_model_mgr().search_model(gen.model)
if model is None:
- gen.model.model_type = ModelType.LLM
+ if gen.inference_type == InferenceType.VLLM:
+ gen.model.model_type = ModelType.VLLM
+ else:
+ gen.model.model_type = ModelType.LLM
model = ctx.get_model_mgr().load_model(gen.model)
ctx.get_model_mgr().add(model)
# Use weakref to achieve model deletion and memory release
diff --git a/EdgeCraftRAG/edgecraftrag/api_schema.py b/EdgeCraftRAG/edgecraftrag/api_schema.py
index 1f124a7f9..5927e0304 100644
--- a/EdgeCraftRAG/edgecraftrag/api_schema.py
+++ b/EdgeCraftRAG/edgecraftrag/api_schema.py
@@ -10,6 +10,7 @@ class ModelIn(BaseModel):
model_type: Optional[str] = "LLM"
model_id: Optional[str]
model_path: Optional[str] = "./"
+ weight: Optional[str]
device: Optional[str] = "cpu"
diff --git a/EdgeCraftRAG/edgecraftrag/base.py b/EdgeCraftRAG/edgecraftrag/base.py
index d8c7aaef8..a163c486f 100644
--- a/EdgeCraftRAG/edgecraftrag/base.py
+++ b/EdgeCraftRAG/edgecraftrag/base.py
@@ -27,6 +27,7 @@ class ModelType(str, Enum):
EMBEDDING = "embedding"
RERANKER = "reranker"
LLM = "llm"
+ VLLM = "vllm"
class FileType(str, Enum):
diff --git a/EdgeCraftRAG/edgecraftrag/components/generator.py b/EdgeCraftRAG/edgecraftrag/components/generator.py
index cbfd6686d..a888bf18f 100644
--- a/EdgeCraftRAG/edgecraftrag/components/generator.py
+++ b/EdgeCraftRAG/edgecraftrag/components/generator.py
@@ -1,10 +1,11 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
+import asyncio
import dataclasses
import os
-from comps import GeneratedDoc, opea_telemetry
+from comps import GeneratedDoc
from edgecraftrag.base import BaseComponent, CompType, GeneratorType
from fastapi.responses import StreamingResponse
from langchain_core.prompts import PromptTemplate
@@ -12,18 +13,6 @@
from pydantic import model_serializer
-@opea_telemetry
-def post_process_text(text: str):
- if text == " ":
- return "data: @#$\n\n"
- if text == "\n":
- return "data:
\n\n"
- if text.isspace():
- return None
- new_text = text.replace(" ", "@#$")
- return f"data: {new_text}\n\n"
-
-
class QnAGenerator(BaseComponent):
def __init__(self, llm_model, prompt_template, inference_type, **kwargs):
@@ -76,8 +65,18 @@ def run(self, chat_request, retrieved_nodes, **kwargs):
repetition_penalty=chat_request.repetition_penalty,
)
self.llm().generate_kwargs = generate_kwargs
+ if chat_request.stream:
+
+ async def stream_generator():
+ response = self.llm().stream_complete(prompt_str)
+ for r in response:
+ yield r.delta
+ # Simulate asynchronous operation
+ await asyncio.sleep(0.01)
- return self.llm().complete(prompt_str)
+ return StreamingResponse(stream_generator(), media_type="text/event-stream")
+ else:
+ return self.llm().complete(prompt_str)
def run_vllm(self, chat_request, retrieved_nodes, **kwargs):
if self.llm is None:
@@ -92,7 +91,7 @@ def run_vllm(self, chat_request, retrieved_nodes, **kwargs):
prompt_str = self.prompt.format(input=query, context=text_gen_context)
llm_endpoint = os.getenv("vLLM_ENDPOINT", "http://localhost:8008")
- model_name = self.llm
+ model_name = self.llm().model_id
llm = OpenAILike(
api_key="fake",
api_base=llm_endpoint + "/v1",
@@ -106,12 +105,10 @@ def run_vllm(self, chat_request, retrieved_nodes, **kwargs):
if chat_request.stream:
async def stream_generator():
- response = await llm.astream_complete(prompt_str)
- async for text in response:
- output = text.text
- yield f"data: {output}\n\n"
-
- yield "data: [DONE]\n\n"
+ response = llm.stream_complete(prompt_str)
+ for text in response:
+ yield text.delta
+ await asyncio.sleep(0.01)
return StreamingResponse(stream_generator(), media_type="text/event-stream")
else:
@@ -122,7 +119,12 @@ async def stream_generator():
@model_serializer
def ser_model(self):
- set = {"idx": self.idx, "generator_type": self.comp_subtype, "model": self.model_id}
+ set = {
+ "idx": self.idx,
+ "generator_type": self.comp_subtype,
+ "inference_type": self.inference_type,
+ "model": self.llm(),
+ }
return set
diff --git a/EdgeCraftRAG/edgecraftrag/components/model.py b/EdgeCraftRAG/edgecraftrag/components/model.py
index 72ee7f16e..75fa69c41 100644
--- a/EdgeCraftRAG/edgecraftrag/components/model.py
+++ b/EdgeCraftRAG/edgecraftrag/components/model.py
@@ -14,6 +14,7 @@ class BaseModelComponent(BaseComponent):
model_id: Optional[str] = Field(default="")
model_path: Optional[str] = Field(default="")
+ weight: Optional[str] = Field(default="")
device: Optional[str] = Field(default="cpu")
def run(self, **kwargs) -> Any:
@@ -26,6 +27,7 @@ def ser_model(self):
"type": self.comp_subtype,
"model_id": self.model_id,
"model_path": self.model_path,
+ "weight": self.weight,
"device": self.device,
}
return set
@@ -33,7 +35,7 @@ def ser_model(self):
class OpenVINOEmbeddingModel(BaseModelComponent, OpenVINOEmbedding):
- def __init__(self, model_id, model_path, device):
+ def __init__(self, model_id, model_path, device, weight):
OpenVINOEmbedding.create_and_save_openvino_model(model_id, model_path)
OpenVINOEmbedding.__init__(self, model_id_or_path=model_path, device=device)
self.comp_type = CompType.MODEL
@@ -41,11 +43,12 @@ def __init__(self, model_id, model_path, device):
self.model_id = model_id
self.model_path = model_path
self.device = device
+ self.weight = ""
class OpenVINORerankModel(BaseModelComponent, OpenVINORerank):
- def __init__(self, model_id, model_path, device):
+ def __init__(self, model_id, model_path, device, weight):
OpenVINORerank.create_and_save_openvino_model(model_id, model_path)
OpenVINORerank.__init__(
self,
@@ -57,11 +60,12 @@ def __init__(self, model_id, model_path, device):
self.model_id = model_id
self.model_path = model_path
self.device = device
+ self.weight = ""
class OpenVINOLLMModel(BaseModelComponent, OpenVINOLLM):
- def __init__(self, model_id, model_path, device):
+ def __init__(self, model_id, model_path, device, weight):
OpenVINOLLM.__init__(
self,
model_id_or_path=model_path,
@@ -72,3 +76,4 @@ def __init__(self, model_id, model_path, device):
self.model_id = model_id
self.model_path = model_path
self.device = device
+ self.weight = weight
diff --git a/EdgeCraftRAG/edgecraftrag/components/pipeline.py b/EdgeCraftRAG/edgecraftrag/components/pipeline.py
index 4a2932e00..5af8b5cbe 100644
--- a/EdgeCraftRAG/edgecraftrag/components/pipeline.py
+++ b/EdgeCraftRAG/edgecraftrag/components/pipeline.py
@@ -110,8 +110,10 @@ def model_existed(self, model_id: str) -> bool:
return True
if self.generator:
llm = self.generator.llm
- if llm() and llm().model_id == model_id:
- return True
+ if isinstance(llm, str):
+ return llm == model_id
+ else:
+ return llm().model_id == model_id
return False
@@ -154,7 +156,8 @@ def run_test_generator(pl: Pipeline, chat_request: ChatCompletionRequest) -> Any
if pl.generator is None:
return "No Generator Specified"
if pl.generator.inference_type == InferenceType.LOCAL:
- answer = pl.generator.run(chat_request, retri_res)
+ return pl.generator.run(chat_request, retri_res)
elif pl.generator.inference_type == InferenceType.VLLM:
- answer = pl.generator.run_vllm(chat_request, retri_res)
- return answer
+ return pl.generator.run_vllm(chat_request, retri_res)
+ else:
+ return "LLM inference_type not supported"
diff --git a/EdgeCraftRAG/edgecraftrag/controllers/modelmgr.py b/EdgeCraftRAG/edgecraftrag/controllers/modelmgr.py
index 73a77e48a..6d0166bc5 100644
--- a/EdgeCraftRAG/edgecraftrag/controllers/modelmgr.py
+++ b/EdgeCraftRAG/edgecraftrag/controllers/modelmgr.py
@@ -3,9 +3,14 @@
import asyncio
-from edgecraftrag.api_schema import IndexerIn, ModelIn, NodeParserIn
-from edgecraftrag.base import BaseComponent, BaseMgr, CallbackType, ModelType
-from edgecraftrag.components.model import OpenVINOEmbeddingModel, OpenVINOLLMModel, OpenVINORerankModel
+from edgecraftrag.api_schema import ModelIn
+from edgecraftrag.base import BaseComponent, BaseMgr, CompType, ModelType
+from edgecraftrag.components.model import (
+ BaseModelComponent,
+ OpenVINOEmbeddingModel,
+ OpenVINOLLMModel,
+ OpenVINORerankModel,
+)
class ModelMgr(BaseMgr):
@@ -78,17 +83,25 @@ def load_model(model_para: ModelIn):
model_id=model_para.model_id,
model_path=model_para.model_path,
device=model_para.device,
+ weight=model_para.weight,
)
case ModelType.RERANKER:
model = OpenVINORerankModel(
model_id=model_para.model_id,
model_path=model_para.model_path,
device=model_para.device,
+ weight=model_para.weight,
)
case ModelType.LLM:
model = OpenVINOLLMModel(
model_id=model_para.model_id,
model_path=model_para.model_path,
device=model_para.device,
+ weight=model_para.weight,
)
+ case ModelType.VLLM:
+ model = BaseModelComponent(model_id=model_para.model_id, model_path="", device="", weight="")
+ model.comp_type = CompType.MODEL
+ model.comp_subtype = ModelType.VLLM
+ model.model_id_or_path = model_para.model_id
return model
diff --git a/EdgeCraftRAG/edgecraftrag/prompt_template/default_prompt.txt b/EdgeCraftRAG/edgecraftrag/prompt_template/default_prompt.txt
index 800d1fa2f..aa57e6059 100644
--- a/EdgeCraftRAG/edgecraftrag/prompt_template/default_prompt.txt
+++ b/EdgeCraftRAG/edgecraftrag/prompt_template/default_prompt.txt
@@ -5,4 +5,4 @@
<|im_start|>System: Pay attention to your formatting of response. If you need to reference content from context, try to keep the formatting.<|im_end|>
<|im_start|>System: Try to summarize from the context, do some reasoning before response, then response. Make sure your response is logically sound and self-consistent.<|im_end|>
-<|im_start|>{input}
+<|im_start|>{input}
\ No newline at end of file
diff --git a/EdgeCraftRAG/edgecraftrag/requirements.txt b/EdgeCraftRAG/edgecraftrag/requirements.txt
index 3756c732a..6757aa752 100644
--- a/EdgeCraftRAG/edgecraftrag/requirements.txt
+++ b/EdgeCraftRAG/edgecraftrag/requirements.txt
@@ -1,6 +1,5 @@
docx2txt
faiss-cpu>=1.8.0.post1
-gradio>=4.44.1
langchain-core==0.2.29
llama-index>=0.11.0
llama-index-embeddings-openvino>=0.4.0
@@ -9,8 +8,4 @@ llama-index-llms-openvino>=0.3.1
llama-index-postprocessor-openvino-rerank>=0.3.0
llama-index-retrievers-bm25>=0.3.0
llama-index-vector-stores-faiss>=0.2.1
-loguru>=0.7.2
-omegaconf>=2.3.0
opea-comps>=0.9
-py-cpuinfo>=9.0.0
-uvicorn>=0.30.6
diff --git a/EdgeCraftRAG/requirements.txt b/EdgeCraftRAG/requirements.txt
new file mode 100644
index 000000000..5b27f1434
--- /dev/null
+++ b/EdgeCraftRAG/requirements.txt
@@ -0,0 +1,2 @@
+fastapi>=0.115.0
+opea-comps>=0.9
diff --git a/EdgeCraftRAG/tests/test_pipeline_local_llm.json b/EdgeCraftRAG/tests/test_pipeline_local_llm.json
index 18895d6e5..13485cebc 100644
--- a/EdgeCraftRAG/tests/test_pipeline_local_llm.json
+++ b/EdgeCraftRAG/tests/test_pipeline_local_llm.json
@@ -9,7 +9,6 @@
"indexer_type": "faiss_vector",
"embedding_model": {
"model_id": "BAAI/bge-small-en-v1.5",
- "model_path": "./models/bge_ov_embedding",
"device": "auto"
}
},
@@ -23,7 +22,6 @@
"top_n": 2,
"reranker_model": {
"model_id": "BAAI/bge-reranker-large",
- "model_path": "./models/bge_ov_reranker",
"device": "auto"
}
}
@@ -31,7 +29,6 @@
"generator": {
"model": {
"model_id": "Qwen/Qwen2-7B-Instruct",
- "model_path": "./models/qwen2-7b-instruct/INT4_compressed_weights",
"device": "cpu"
},
"prompt_path": "./edgecraftrag/prompt_template/default_prompt.txt",
diff --git a/EdgeCraftRAG/ui/docker/Dockerfile.ui b/EdgeCraftRAG/ui/docker/Dockerfile.ui
index 46a14a6e9..3dacb35d8 100644
--- a/EdgeCraftRAG/ui/docker/Dockerfile.ui
+++ b/EdgeCraftRAG/ui/docker/Dockerfile.ui
@@ -11,10 +11,11 @@ RUN useradd -m -s /bin/bash user && \
COPY ./ui/gradio /home/user/ui
COPY ./edgecraftrag /home/user/edgecraftrag
-WORKDIR /home/user/edgecraftrag
-RUN pip install --no-cache-dir -r requirements.txt
+RUN mkdir -p /home/user/gradio_cache
+ENV GRADIO_TEMP_DIR=/home/user/gradio_cache
WORKDIR /home/user/ui
+RUN pip install --no-cache-dir -r requirements.txt
USER user
diff --git a/EdgeCraftRAG/ui/gradio/default.yaml b/EdgeCraftRAG/ui/gradio/default.yaml
index 1421da8f4..39c3ee92e 100644
--- a/EdgeCraftRAG/ui/gradio/default.yaml
+++ b/EdgeCraftRAG/ui/gradio/default.yaml
@@ -3,7 +3,6 @@
# Model language for LLM
model_language: "Chinese"
-vector_db: "FAISS"
splitter_name: "RecursiveCharacter"
k_rerank: 5
search_method: "similarity"
@@ -29,21 +28,19 @@ k_retrieval: 30
postprocessor: "reranker"
# Generator
-generator: "local"
-prompt_path: "./data/default_prompt.txt"
+generator: "chatqna"
+prompt_path: "./edgecraftrag/prompt_template/default_prompt.txt"
# Models
embedding_model_id: "BAAI/bge-small-en-v1.5"
-embedding_model_path: "./bge_ov_embedding"
# Device for embedding model inference
embedding_device: "AUTO"
rerank_model_id: "BAAI/bge-reranker-large"
-rerank_model_path: "./bge_ov_reranker"
# Device for reranking model inference
rerank_device: "AUTO"
-llm_model_id: "qwen2-7b-instruct"
-llm_model_path: "./qwen2-7b-instruct/INT4_compressed_weights"
+llm_model_id: "Qwen/Qwen2-7B-Instruct"
+llm_weights: "INT4"
# Device for LLM model inference
llm_device: "AUTO"
diff --git a/EdgeCraftRAG/ui/gradio/ecrag_client.py b/EdgeCraftRAG/ui/gradio/ecrag_client.py
index 47b5f776d..6593cbd94 100644
--- a/EdgeCraftRAG/ui/gradio/ecrag_client.py
+++ b/EdgeCraftRAG/ui/gradio/ecrag_client.py
@@ -1,13 +1,13 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
+import os
import sys
+import platform_config as pconf
import requests
sys.path.append("..")
-import os
-
from edgecraftrag import api_schema
PIPELINE_SERVICE_HOST_IP = os.getenv("PIPELINE_SERVICE_HOST_IP", "127.0.0.1")
@@ -42,6 +42,7 @@ def create_update_pipeline(
vector_search_top_k,
postprocessor,
generator,
+ llm_infertype,
llm_id,
llm_device,
llm_weights,
@@ -50,6 +51,7 @@ def create_update_pipeline(
rerank_id,
rerank_device,
):
+ llm_path = pconf.get_llm_model_dir("./models/", llm_id, llm_weights)
req_dict = api_schema.PipelineCreateIn(
name=name,
active=active,
@@ -60,9 +62,9 @@ def create_update_pipeline(
indexer_type=indexer,
embedding_model=api_schema.ModelIn(
model_id=embedding_id,
- # TODO: remove hardcoding
- model_path="./bge_ov_embedding",
+ model_path="./models/" + embedding_id,
device=embedding_device,
+ weight=llm_weights,
),
),
retriever=api_schema.RetrieverIn(retriever_type=retriever, retriever_topk=vector_search_top_k),
@@ -70,22 +72,15 @@ def create_update_pipeline(
api_schema.PostProcessorIn(
processor_type=postprocessor[0],
reranker_model=api_schema.ModelIn(
- model_id=rerank_id,
- # TODO: remove hardcoding
- model_path="./bge_ov_reranker",
- device=rerank_device,
+ model_id=rerank_id, model_path="./models/" + rerank_id, device=rerank_device, weight=llm_weights
),
)
],
generator=api_schema.GeneratorIn(
# TODO: remove hardcoding
prompt_path="./edgecraftrag/prompt_template/default_prompt.txt",
- model=api_schema.ModelIn(
- model_id=llm_id,
- # TODO: remove hardcoding
- model_path="./models/qwen2-7b-instruct/INT4_compressed_weights",
- device=llm_device,
- ),
+ model=api_schema.ModelIn(model_id=llm_id, model_path=llm_path, device=llm_device, weight=llm_weights),
+ inference_type=llm_infertype,
),
)
# hard code only for test
@@ -105,7 +100,7 @@ def activate_pipeline(name):
return restext, status
-def create_vectordb(docs, spliter, vector_db):
+def create_vectordb(docs, spliter):
req_dict = api_schema.FilesIn(local_paths=docs)
res = requests.post(f"{server_addr}/v1/data/files", json=req_dict.dict(), proxies={"http": None})
return res.text
@@ -116,6 +111,8 @@ def get_files():
files = []
for file in res.json():
files.append((file["file_name"], file["file_id"]))
+ if not files:
+ files.append((None, None))
return files
diff --git a/EdgeCraftRAG/ui/gradio/ecragui.py b/EdgeCraftRAG/ui/gradio/ecragui.py
index 3c198bf2a..23a5286de 100644
--- a/EdgeCraftRAG/ui/gradio/ecragui.py
+++ b/EdgeCraftRAG/ui/gradio/ecragui.py
@@ -2,11 +2,9 @@
# SPDX-License-Identifier: Apache-2.0
import argparse
-import json
+import os
import platform
-import re
from datetime import datetime
-from pathlib import Path
import cpuinfo
import distro # if running Python 3.8 or above
@@ -17,41 +15,22 @@
# Creation of the ModelLoader instance and loading models remain the same
import platform_config as pconf
import psutil
-import requests
from loguru import logger
from omegaconf import OmegaConf
-from platform_config import get_available_devices, get_available_weights, get_local_available_models
+from platform_config import (
+ get_avail_llm_inference_type,
+ get_available_devices,
+ get_available_weights,
+ get_local_available_models,
+)
pipeline_df = []
-import os
MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "127.0.0.1")
MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 16011))
UI_SERVICE_HOST_IP = os.getenv("UI_SERVICE_HOST_IP", "0.0.0.0")
-UI_SERVICE_PORT = int(os.getenv("UI_SERVICE_PORT", 8084))
-
-
-def get_llm_model_dir(llm_model_id, weights_compression):
- model_dirs = {
- "fp16_model_dir": Path(llm_model_id) / "FP16",
- "int8_model_dir": Path(llm_model_id) / "INT8_compressed_weights",
- "int4_model_dir": Path(llm_model_id) / "INT4_compressed_weights",
- }
-
- if weights_compression == "INT4":
- model_dir = model_dirs["int4_model_dir"]
- elif weights_compression == "INT8":
- model_dir = model_dirs["int8_model_dir"]
- else:
- model_dir = model_dirs["fp16_model_dir"]
-
- if not model_dir.exists():
- raise FileNotFoundError(f"The model directory {model_dir} does not exist.")
- elif not model_dir.is_dir():
- raise NotADirectoryError(f"The path {model_dir} is not a directory.")
-
- return model_dir
+UI_SERVICE_PORT = int(os.getenv("UI_SERVICE_PORT", 8082))
def get_system_status():
@@ -87,31 +66,7 @@ def get_system_status():
return status
-def build_demo(cfg, args):
-
- def load_chatbot_models(
- llm_id,
- llm_device,
- llm_weights,
- embedding_id,
- embedding_device,
- rerank_id,
- rerank_device,
- ):
- req_dict = {
- "llm_id": llm_id,
- "llm_device": llm_device,
- "llm_weights": llm_weights,
- "embedding_id": embedding_id,
- "embedding_device": embedding_device,
- "rerank_id": rerank_id,
- "rerank_device": rerank_device,
- }
- # hard code only for test
- worker_addr = "http://127.0.0.1:8084"
- print(req_dict)
- result = requests.post(f"{worker_addr}/load", json=req_dict, proxies={"http": None})
- return result.text
+def build_app(cfg, args):
def user(message, history):
"""Callback function for updating user messages in interface on submit button click.
@@ -131,11 +86,9 @@ async def bot(
top_p,
top_k,
repetition_penalty,
+ max_tokens,
hide_full_prompt,
- do_rag,
docs,
- spliter_name,
- vector_db,
chunk_size,
chunk_overlap,
vector_search_top_k,
@@ -155,41 +108,16 @@ async def bot(
repetition_penalty: parameter for penalizing tokens based on how frequently they occur in the text.
conversation_id: unique conversation identifier.
"""
- # req_dict = {
- # "history": history,
- # "temperature": temperature,
- # "top_p": top_p,
- # "top_k": top_k,
- # "repetition_penalty": repetition_penalty,
- # "hide_full_prompt": hide_full_prompt,
- # "do_rag": do_rag,
- # "docs": docs,
- # "spliter_name": spliter_name,
- # "vector_db": vector_db,
- # "chunk_size": chunk_size,
- # "chunk_overlap": chunk_overlap,
- # "vector_search_top_k": vector_search_top_k,
- # "vector_search_top_n": vector_search_top_n,
- # "run_rerank": run_rerank,
- # "search_method": search_method,
- # "score_threshold": score_threshold,
- # "streaming": True
- # }
- print(history)
- new_req = {"messages": history[-1][0]}
+ stream_opt = True
+ new_req = {"messages": history[-1][0], "stream": stream_opt, "max_tokens": max_tokens}
server_addr = f"http://{MEGA_SERVICE_HOST_IP}:{MEGA_SERVICE_PORT}"
# Async for streaming response
partial_text = ""
async with httpx.AsyncClient() as client:
async with client.stream("POST", f"{server_addr}/v1/chatqna", json=new_req, timeout=None) as response:
- partial_text = ""
- async for chunk in response.aiter_lines():
- new_text = chunk
- if new_text.startswith("data"):
- new_text = re.sub(r"\r\n", "", chunk.split("data: ")[-1])
- new_text = json.loads(chunk)["choices"][0]["message"]["content"]
- partial_text = partial_text + new_text
+ async for chunk in response.aiter_text():
+ partial_text = partial_text + chunk
history[-1][1] = partial_text
yield history
@@ -198,6 +126,7 @@ async def bot(
avail_rerank_models = get_local_available_models("rerank")
avail_devices = get_available_devices()
avail_weights_compression = get_available_weights()
+ avail_llm_inference_type = get_avail_llm_inference_type()
avail_node_parsers = pconf.get_available_node_parsers()
avail_indexers = pconf.get_available_indexers()
avail_retrievers = pconf.get_available_retrievers()
@@ -212,7 +141,7 @@ async def bot(
.disclaimer {font-variant-caps: all-small-caps}
"""
- with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
+ with gr.Blocks(theme=gr.themes.Soft(), css=css) as app:
gr.HTML(
"""
@@ -250,7 +179,7 @@ async def bot(