Refactor FaqGen (#1093)

Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
opea-project · Jan 13, 2025 · ea72c94 · ea72c94
1 parent 3f23bf5
commit ea72c94
Show file tree

Hide file tree

Showing 31 changed files with 962 additions and 551 deletions.
diff --git a/.github/workflows/docker/compose/llms-compose.yaml b/.github/workflows/docker/compose/llms-compose.yaml
@@ -15,10 +15,10 @@ services:
     build:
       dockerfile: comps/llms/summarization/tgi/langchain/Dockerfile
     image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest}
-  llm-faqgen-tgi:
+  llm-faqgen:
     build:
-      dockerfile: comps/llms/faq-generation/tgi/langchain/Dockerfile
-    image: ${REGISTRY:-opea}/llm-faqgen-tgi:${TAG:-latest}
+      dockerfile: comps/llms/src/faq-generation/Dockerfile
+    image: ${REGISTRY:-opea}/llm-faqgen:${TAG:-latest}
   llm-native:
     build:
       dockerfile: comps/llms/text-generation/native/langchain/Dockerfile
@@ -54,7 +54,3 @@ services:
     build:
       dockerfile: comps/llms/summarization/vllm/langchain/Dockerfile
     image: ${REGISTRY:-opea}/llm-docsum-vllm:${TAG:-latest}
-  llm-faqgen-vllm:
-    build:
-      dockerfile: comps/llms/faq-generation/vllm/langchain/Dockerfile
-    image: ${REGISTRY:-opea}/llm-faqgen-vllm:${TAG:-latest}
diff --git a/comps/llms/deployment/docker_compose/faq-generation_tgi.yaml b/comps/llms/deployment/docker_compose/faq-generation_tgi.yaml
@@ -0,0 +1,50 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+version: "3.8"
+
+services:
+  tgi-service:
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    container_name: tgi-server
+    ports:
+      - ${LLM_ENDPOINT_PORT:-8008}:80
+    volumes:
+      - "./data:/data"
+    shm_size: 1g
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      host_ip: ${host_ip}
+      LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT}
+    healthcheck:
+      test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"]
+      interval: 10s
+      timeout: 10s
+      retries: 100
+    command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
+  llm:
+    image: opea/llm-faqgen:latest
+    container_name: llm-faqgen-server
+    depends_on:
+      tgi-service:
+        condition: service_healthy
+    ports:
+      - ${FAQ_PORT:-9000}:9000
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      LLM_ENDPOINT: ${LLM_ENDPOINT}
+      LLM_MODEL_ID: ${LLM_MODEL_ID}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      FAQGen_COMPONENT_NAME: ${FAQGen_COMPONENT_NAME}
+      LOGFLAG: ${LOGFLAG:-False}
+    restart: unless-stopped
+
+networks:
+  default:
+    driver: bridge
diff --git a/comps/llms/deployment/docker_compose/faq-generation_tgi_on_intel_hpu.yaml b/comps/llms/deployment/docker_compose/faq-generation_tgi_on_intel_hpu.yaml
@@ -0,0 +1,61 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+version: "3.8"
+
+services:
+  tgi-service:
+    image: ghcr.io/huggingface/tgi-gaudi:2.3.1
+    container_name: tgi-gaudi-server
+    ports:
+      - ${LLM_ENDPOINT_PORT:-8008}:80
+    volumes:
+      - "./data:/data"
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+      HABANA_VISIBLE_DEVICES: all
+      OMPI_MCA_btl_vader_single_copy_mechanism: none
+      ENABLE_HPU_GRAPH: true
+      LIMIT_HPU_GRAPH: true
+      USE_FLASH_ATTENTION: true
+      FLASH_ATTENTION_RECOMPUTE: true
+      host_ip: ${host_ip}
+      LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT}
+    runtime: habana
+    cap_add:
+      - SYS_NICE
+    ipc: host
+    healthcheck:
+      test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"]
+      interval: 10s
+      timeout: 10s
+      retries: 100
+    command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
+  llm:
+    image: opea/llm-faqgen:latest
+    container_name: llm-faqgen-server
+    depends_on:
+      tgi-service:
+        condition: service_healthy
+    ports:
+      - ${FAQ_PORT:-9000}:9000
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      LLM_ENDPOINT: ${LLM_ENDPOINT}
+      LLM_MODEL_ID: ${LLM_MODEL_ID}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      FAQGen_COMPONENT_NAME: ${FAQGen_COMPONENT_NAME}
+      LOGFLAG: ${LOGFLAG:-False}
+    restart: unless-stopped
+
+networks:
+  default:
+    driver: bridge
diff --git a/comps/llms/deployment/docker_compose/faq-generation_vllm.yaml b/comps/llms/deployment/docker_compose/faq-generation_vllm.yaml
@@ -0,0 +1,53 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+version: "3.8"
+
+services:
+  vllm-service:
+    image: opea/vllm:latest
+    container_name: vllm-server
+    ports:
+      - ${LLM_ENDPOINT_PORT:-8008}:80
+    volumes:
+      - "./data:/data"
+    shm_size: 128g
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      LLM_MODEL_ID: ${LLM_MODEL_ID}
+      VLLM_TORCH_PROFILER_DIR: "/mnt"
+      host_ip: ${host_ip}
+      LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT}
+      VLLM_SKIP_WARMUP: ${VLLM_SKIP_WARMUP:-false}
+    healthcheck:
+      test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"]
+      interval: 10s
+      timeout: 10s
+      retries: 100
+    command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80
+  llm:
+    image: opea/llm-faqgen:latest
+    container_name: llm-faqgen-server
+    depends_on:
+      vllm-service:
+        condition: service_healthy
+    ports:
+      - ${FAQ_PORT:-9000}:9000
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      LLM_ENDPOINT: ${LLM_ENDPOINT}
+      LLM_MODEL_ID: ${LLM_MODEL_ID}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      FAQGen_COMPONENT_NAME: ${FAQGen_COMPONENT_NAME}
+      LOGFLAG: ${LOGFLAG:-False}
+    restart: unless-stopped
+
+networks:
+  default:
+    driver: bridge
diff --git a/...on/vllm/langchain/docker_compose_llm.yaml → ...ose/faq-generation_vllm_on_intel_hpu.yaml b/...on/vllm/langchain/docker_compose_llm.yaml → ...ose/faq-generation_vllm_on_intel_hpu.yaml
@@ -8,37 +8,49 @@ services:
     image: opea/vllm-gaudi:latest
     container_name: vllm-gaudi-server
     ports:
-      - "8008:80"
+      - ${LLM_ENDPOINT_PORT:-8008}:80
     volumes:
       - "./data:/data"
     environment:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HF_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
       HABANA_VISIBLE_DEVICES: all
       OMPI_MCA_btl_vader_single_copy_mechanism: none
       LLM_MODEL_ID: ${LLM_MODEL_ID}
+      VLLM_TORCH_PROFILER_DIR: "/mnt"
+      host_ip: ${host_ip}
+      LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT}
+      VLLM_SKIP_WARMUP: ${VLLM_SKIP_WARMUP:-false}
     runtime: habana
     cap_add:
       - SYS_NICE
     ipc: host
-    command: --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80
+    healthcheck:
+      test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"]
+      interval: 10s
+      timeout: 10s
+      retries: 100
+    command: --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
   llm:
-    image: opea/llm-faqgen-vllm:latest
+    image: opea/llm-faqgen:latest
     container_name: llm-faqgen-server
     depends_on:
-      - vllm-service
+      vllm-service:
+        condition: service_healthy
     ports:
-      - "9000:9000"
+      - ${FAQ_PORT:-9000}:9000
     ipc: host
     environment:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      vLLM_ENDPOINT: ${vLLM_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      LLM_ENDPOINT: ${LLM_ENDPOINT}
       LLM_MODEL_ID: ${LLM_MODEL_ID}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      FAQGen_COMPONENT_NAME: ${FAQGen_COMPONENT_NAME}
+      LOGFLAG: ${LOGFLAG:-False}
     restart: unless-stopped
 
 networks:

diff --git a/comps/llms/faq-generation/tgi/langchain/README.md b/comps/llms/faq-generation/tgi/langchain/README.md
diff --git a/comps/llms/faq-generation/tgi/langchain/docker_compose_llm.yaml b/comps/llms/faq-generation/tgi/langchain/docker_compose_llm.yaml
diff --git a/comps/llms/faq-generation/tgi/langchain/entrypoint.sh b/comps/llms/faq-generation/tgi/langchain/entrypoint.sh