Skip to content

Commit

Permalink
Refactor llm Docsum (opea-project#1101)
Browse files Browse the repository at this point in the history
Signed-off-by: Xinyao Wang <xinyao.wang@intel.com>
  • Loading branch information
XinyaoWa authored and smguggen committed Jan 23, 2025
1 parent 484bd22 commit e7ee87e
Show file tree
Hide file tree
Showing 29 changed files with 1,195 additions and 961 deletions.
8 changes: 2 additions & 6 deletions .github/workflows/docker/compose/llms-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ services:
build:
dockerfile: comps/llms/text-generation/ollama/langchain/Dockerfile
image: ${REGISTRY:-opea}/llm-ollama:${TAG:-latest}
llm-docsum-tgi:
llm-docsum:
build:
dockerfile: comps/llms/summarization/tgi/langchain/Dockerfile
dockerfile: comps/llms/src/doc-summarization/Dockerfile
image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest}
llm-faqgen:
build:
Expand Down Expand Up @@ -50,7 +50,3 @@ services:
build:
dockerfile: comps/llms/text-generation/predictionguard/Dockerfile
image: ${REGISTRY:-opea}/llm-textgen-predictionguard:${TAG:-latest}
llm-docsum-vllm:
build:
dockerfile: comps/llms/summarization/vllm/langchain/Dockerfile
image: ${REGISTRY:-opea}/llm-docsum-vllm:${TAG:-latest}
52 changes: 52 additions & 0 deletions comps/llms/deployment/docker_compose/doc-summarization_tgi.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

version: "3.8"

services:
tgi-service:
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
container_name: tgi-server
ports:
- ${LLM_ENDPOINT_PORT:-8008}:80
volumes:
- "./data:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
host_ip: ${host_ip}
LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT}
healthcheck:
test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"]
interval: 10s
timeout: 10s
retries: 100
command: --model-id ${LLM_MODEL_ID} --max-input-length ${MAX_INPUT_TOKENS} --max-total-tokens ${MAX_TOTAL_TOKENS}
llm:
image: opea/llm-docsum:latest
container_name: llm-docsum-server
depends_on:
tgi-service:
condition: service_healthy
ports:
- ${DOCSUM_PORT:-9000}:9000
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
LLM_ENDPOINT: ${LLM_ENDPOINT}
LLM_MODEL_ID: ${LLM_MODEL_ID}
DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME}
LOGFLAG: ${LOGFLAG:-False}
restart: unless-stopped

networks:
default:
driver: bridge
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

version: "3.8"

services:
tgi-service:
image: ghcr.io/huggingface/tgi-gaudi:2.3.1
container_name: tgi_gaudi_server
ports:
- ${LLM_ENDPOINT_PORT:-8008}:80
volumes:
- "./data:/data"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
ENABLE_HPU_GRAPH: true
LIMIT_HPU_GRAPH: true
USE_FLASH_ATTENTION: true
FLASH_ATTENTION_RECOMPUTE: true
host_ip: ${host_ip}
LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT}
runtime: habana
cap_add:
- SYS_NICE
ipc: host
healthcheck:
test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"]
interval: 10s
timeout: 10s
retries: 100
command: --model-id ${LLM_MODEL_ID} --max-input-length ${MAX_INPUT_TOKENS} --max-total-tokens ${MAX_TOTAL_TOKENS}
llm:
image: opea/llm-docsum:latest
container_name: llm-docsum-server
depends_on:
tgi-service:
condition: service_healthy
ports:
- ${DOCSUM_PORT:-9000}:9000
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
LLM_ENDPOINT: ${LLM_ENDPOINT}
LLM_MODEL_ID: ${LLM_MODEL_ID}
DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME}
LOGFLAG: ${LOGFLAG:-False}
restart: unless-stopped

networks:
default:
driver: bridge
55 changes: 55 additions & 0 deletions comps/llms/deployment/docker_compose/doc-summarization_vllm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

version: "3.8"

services:
vllm-service:
image: opea/vllm:latest
container_name: vllm-server
ports:
- ${LLM_ENDPOINT_PORT:-8008}:80
volumes:
- "./data:/data"
shm_size: 128g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
LLM_MODEL_ID: ${LLM_MODEL_ID}
VLLM_TORCH_PROFILER_DIR: "/mnt"
host_ip: ${host_ip}
LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT}
VLLM_SKIP_WARMUP: ${VLLM_SKIP_WARMUP:-false}
healthcheck:
test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"]
interval: 10s
timeout: 10s
retries: 100
command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80
llm:
image: opea/llm-docsum:latest
container_name: llm-docsum-server
depends_on:
vllm-service:
condition: service_healthy
ports:
- ${DOCSUM_PORT:-9000}:9000
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
LLM_ENDPOINT: ${LLM_ENDPOINT}
LLM_MODEL_ID: ${LLM_MODEL_ID}
DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME}
LOGFLAG: ${LOGFLAG:-False}
restart: unless-stopped

networks:
default:
driver: bridge
Original file line number Diff line number Diff line change
Expand Up @@ -8,37 +8,52 @@ services:
image: opea/vllm-gaudi:latest
container_name: vllm-gaudi-server
ports:
- "8008:80"
- ${LLM_ENDPOINT_PORT:-8008}:80
volumes:
- "./data:/data"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_TOKEN: ${HF_TOKEN}
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
LLM_MODEL_ID: ${LLM_MODEL_ID}
MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
VLLM_TORCH_PROFILER_DIR: "/mnt"
host_ip: ${host_ip}
LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT}
VLLM_SKIP_WARMUP: ${VLLM_SKIP_WARMUP:-false}
runtime: habana
cap_add:
- SYS_NICE
ipc: host
command: --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80
healthcheck:
test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"]
interval: 10s
timeout: 10s
retries: 100
command: --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq-len-to-capture ${MAX_INPUT_TOKENS}
llm:
image: opea/llm-docsum-vllm:latest
container_name: llm-docsum-vllm-server
image: opea/llm-docsum:latest
container_name: llm-docsum-server
depends_on:
vllm-service:
condition: service_healthy
ports:
- "9000:9000"
- ${DOCSUM_PORT:-9000}:9000
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
vLLM_ENDPOINT: ${vLLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
LLM_MODEL_ID: ${LLM_MODEL_ID}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
LLM_ENDPOINT: ${LLM_ENDPOINT}
LLM_MODEL_ID: ${LLM_MODEL_ID}
DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME}
LOGFLAG: ${LOGFLAG:-False}
restart: unless-stopped

networks:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@ COPY comps /home/user/comps

RUN pip install --no-cache-dir --upgrade pip setuptools && \
if [ ${ARCH} = "cpu" ]; then pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \
pip install --no-cache-dir -r /home/user/comps/llms/summarization/tgi/langchain/requirements.txt
pip install --no-cache-dir -r /home/user/comps/llms/src/doc-summarization/requirements.txt

ENV PYTHONPATH=$PYTHONPATH:/home/user

WORKDIR /home/user/comps/llms/summarization/tgi/langchain
WORKDIR /home/user/comps/llms/src/doc-summarization

ENTRYPOINT ["bash", "entrypoint.sh"]
Loading

0 comments on commit e7ee87e

Please sign in to comment.