Skip to content

Commit

Permalink
Refactor FaqGen (#1093)
Browse files Browse the repository at this point in the history
Signed-off-by: Xinyao Wang <xinyao.wang@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
XinyaoWa and pre-commit-ci[bot] authored Jan 13, 2025
1 parent 3f23bf5 commit ea72c94
Show file tree
Hide file tree
Showing 31 changed files with 962 additions and 551 deletions.
10 changes: 3 additions & 7 deletions .github/workflows/docker/compose/llms-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ services:
build:
dockerfile: comps/llms/summarization/tgi/langchain/Dockerfile
image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest}
llm-faqgen-tgi:
llm-faqgen:
build:
dockerfile: comps/llms/faq-generation/tgi/langchain/Dockerfile
image: ${REGISTRY:-opea}/llm-faqgen-tgi:${TAG:-latest}
dockerfile: comps/llms/src/faq-generation/Dockerfile
image: ${REGISTRY:-opea}/llm-faqgen:${TAG:-latest}
llm-native:
build:
dockerfile: comps/llms/text-generation/native/langchain/Dockerfile
Expand Down Expand Up @@ -54,7 +54,3 @@ services:
build:
dockerfile: comps/llms/summarization/vllm/langchain/Dockerfile
image: ${REGISTRY:-opea}/llm-docsum-vllm:${TAG:-latest}
llm-faqgen-vllm:
build:
dockerfile: comps/llms/faq-generation/vllm/langchain/Dockerfile
image: ${REGISTRY:-opea}/llm-faqgen-vllm:${TAG:-latest}
50 changes: 50 additions & 0 deletions comps/llms/deployment/docker_compose/faq-generation_tgi.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

version: "3.8"

services:
tgi-service:
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
container_name: tgi-server
ports:
- ${LLM_ENDPOINT_PORT:-8008}:80
volumes:
- "./data:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
host_ip: ${host_ip}
LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT}
healthcheck:
test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"]
interval: 10s
timeout: 10s
retries: 100
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
llm:
image: opea/llm-faqgen:latest
container_name: llm-faqgen-server
depends_on:
tgi-service:
condition: service_healthy
ports:
- ${FAQ_PORT:-9000}:9000
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
LLM_ENDPOINT: ${LLM_ENDPOINT}
LLM_MODEL_ID: ${LLM_MODEL_ID}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
FAQGen_COMPONENT_NAME: ${FAQGen_COMPONENT_NAME}
LOGFLAG: ${LOGFLAG:-False}
restart: unless-stopped

networks:
default:
driver: bridge
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

version: "3.8"

services:
tgi-service:
image: ghcr.io/huggingface/tgi-gaudi:2.3.1
container_name: tgi-gaudi-server
ports:
- ${LLM_ENDPOINT_PORT:-8008}:80
volumes:
- "./data:/data"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
ENABLE_HPU_GRAPH: true
LIMIT_HPU_GRAPH: true
USE_FLASH_ATTENTION: true
FLASH_ATTENTION_RECOMPUTE: true
host_ip: ${host_ip}
LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT}
runtime: habana
cap_add:
- SYS_NICE
ipc: host
healthcheck:
test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"]
interval: 10s
timeout: 10s
retries: 100
command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
llm:
image: opea/llm-faqgen:latest
container_name: llm-faqgen-server
depends_on:
tgi-service:
condition: service_healthy
ports:
- ${FAQ_PORT:-9000}:9000
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
LLM_ENDPOINT: ${LLM_ENDPOINT}
LLM_MODEL_ID: ${LLM_MODEL_ID}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
FAQGen_COMPONENT_NAME: ${FAQGen_COMPONENT_NAME}
LOGFLAG: ${LOGFLAG:-False}
restart: unless-stopped

networks:
default:
driver: bridge
53 changes: 53 additions & 0 deletions comps/llms/deployment/docker_compose/faq-generation_vllm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

version: "3.8"

services:
vllm-service:
image: opea/vllm:latest
container_name: vllm-server
ports:
- ${LLM_ENDPOINT_PORT:-8008}:80
volumes:
- "./data:/data"
shm_size: 128g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
LLM_MODEL_ID: ${LLM_MODEL_ID}
VLLM_TORCH_PROFILER_DIR: "/mnt"
host_ip: ${host_ip}
LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT}
VLLM_SKIP_WARMUP: ${VLLM_SKIP_WARMUP:-false}
healthcheck:
test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"]
interval: 10s
timeout: 10s
retries: 100
command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80
llm:
image: opea/llm-faqgen:latest
container_name: llm-faqgen-server
depends_on:
vllm-service:
condition: service_healthy
ports:
- ${FAQ_PORT:-9000}:9000
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
LLM_ENDPOINT: ${LLM_ENDPOINT}
LLM_MODEL_ID: ${LLM_MODEL_ID}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
FAQGen_COMPONENT_NAME: ${FAQGen_COMPONENT_NAME}
LOGFLAG: ${LOGFLAG:-False}
restart: unless-stopped

networks:
default:
driver: bridge
Original file line number Diff line number Diff line change
Expand Up @@ -8,37 +8,49 @@ services:
image: opea/vllm-gaudi:latest
container_name: vllm-gaudi-server
ports:
- "8008:80"
- ${LLM_ENDPOINT_PORT:-8008}:80
volumes:
- "./data:/data"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_TOKEN: ${HF_TOKEN}
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
LLM_MODEL_ID: ${LLM_MODEL_ID}
VLLM_TORCH_PROFILER_DIR: "/mnt"
host_ip: ${host_ip}
LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT}
VLLM_SKIP_WARMUP: ${VLLM_SKIP_WARMUP:-false}
runtime: habana
cap_add:
- SYS_NICE
ipc: host
command: --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80
healthcheck:
test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"]
interval: 10s
timeout: 10s
retries: 100
command: --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
llm:
image: opea/llm-faqgen-vllm:latest
image: opea/llm-faqgen:latest
container_name: llm-faqgen-server
depends_on:
- vllm-service
vllm-service:
condition: service_healthy
ports:
- "9000:9000"
- ${FAQ_PORT:-9000}:9000
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
vLLM_ENDPOINT: ${vLLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
LLM_ENDPOINT: ${LLM_ENDPOINT}
LLM_MODEL_ID: ${LLM_MODEL_ID}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
FAQGen_COMPONENT_NAME: ${FAQGen_COMPONENT_NAME}
LOGFLAG: ${LOGFLAG:-False}
restart: unless-stopped

networks:
Expand Down
75 changes: 0 additions & 75 deletions comps/llms/faq-generation/tgi/langchain/README.md

This file was deleted.

34 changes: 0 additions & 34 deletions comps/llms/faq-generation/tgi/langchain/docker_compose_llm.yaml

This file was deleted.

8 changes: 0 additions & 8 deletions comps/llms/faq-generation/tgi/langchain/entrypoint.sh

This file was deleted.

Loading

0 comments on commit ea72c94

Please sign in to comment.