Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor llm Docsum #1101

Merged
merged 33 commits into from
Jan 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
937a908
refactor docsum
XinyaoWa Dec 31, 2024
f15fdf9
vllm input
XinyaoWa Dec 31, 2024
2251a0f
refine ut for docsum
XinyaoWa Jan 2, 2025
e3a8891
fix docker path for docsum
XinyaoWa Jan 2, 2025
79e3404
for ut, duplicate with faqgen pr, can be removed later
XinyaoWa Jan 2, 2025
aa6fbc0
fix bug
XinyaoWa Jan 2, 2025
aabf059
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 2, 2025
1c94d2f
fix conflict
XinyaoWa Jan 9, 2025
75f2620
align to registry
XinyaoWa Jan 9, 2025
fbdc51b
fix streaming
XinyaoWa Jan 9, 2025
9d869c9
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 9, 2025
d95e8f3
fix bug
XinyaoWa Jan 10, 2025
cd6d2b2
rename
XinyaoWa Jan 10, 2025
c04cad3
Merge branch 'main' into refactor_docsum
XinyaoWa Jan 10, 2025
21a3a52
Merge remote-tracking branch 'upstream/refactor_docsum' into refactor…
XinyaoWa Jan 10, 2025
6280eba
Merge branch 'main' into refactor_docsum
XinyaoWa Jan 10, 2025
fb914f0
fix bug
XinyaoWa Jan 10, 2025
6b829f5
fix port bug
XinyaoWa Jan 10, 2025
c30df65
algin docsum endpoint
XinyaoWa Jan 10, 2025
be99d46
rm vllm ut, too slow
XinyaoWa Jan 11, 2025
8fa8404
refine code
XinyaoWa Jan 11, 2025
e2f9d99
Merge branch 'main' into refactor_docsum
XinyaoWa Jan 11, 2025
7e13ca6
Merge remote-tracking branch 'upstream/refactor_docsum' into refactor…
XinyaoWa Jan 11, 2025
18e2ee0
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 11, 2025
e548779
rename
XinyaoWa Jan 13, 2025
f407abb
fix link
XinyaoWa Jan 13, 2025
6e029f2
Merge remote-tracking branch 'upstream/refactor_docsum' into refactor…
XinyaoWa Jan 13, 2025
93f041a
fix conflict
XinyaoWa Jan 13, 2025
cf663ee
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 13, 2025
ab9d4da
Merge remote-tracking branch 'upstream/refactor_docsum' into refactor…
XinyaoWa Jan 13, 2025
33f64f2
rename for ut
XinyaoWa Jan 13, 2025
b0e755a
Merge branch 'main' into refactor_docsum
XinyaoWa Jan 13, 2025
fe7f360
fix bug
XinyaoWa Jan 13, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions .github/workflows/docker/compose/llms-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ services:
build:
dockerfile: comps/llms/text-generation/ollama/langchain/Dockerfile
image: ${REGISTRY:-opea}/llm-ollama:${TAG:-latest}
llm-docsum-tgi:
llm-docsum:
build:
dockerfile: comps/llms/summarization/tgi/langchain/Dockerfile
dockerfile: comps/llms/src/doc-summarization/Dockerfile
image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest}
llm-faqgen:
build:
Expand Down Expand Up @@ -50,7 +50,3 @@ services:
build:
dockerfile: comps/llms/text-generation/predictionguard/Dockerfile
image: ${REGISTRY:-opea}/llm-textgen-predictionguard:${TAG:-latest}
llm-docsum-vllm:
build:
dockerfile: comps/llms/summarization/vllm/langchain/Dockerfile
image: ${REGISTRY:-opea}/llm-docsum-vllm:${TAG:-latest}
52 changes: 52 additions & 0 deletions comps/llms/deployment/docker_compose/doc-summarization_tgi.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

version: "3.8"

services:
tgi-service:
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
container_name: tgi-server
ports:
- ${LLM_ENDPOINT_PORT:-8008}:80
volumes:
- "./data:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
host_ip: ${host_ip}
LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT}
healthcheck:
test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"]
interval: 10s
timeout: 10s
retries: 100
command: --model-id ${LLM_MODEL_ID} --max-input-length ${MAX_INPUT_TOKENS} --max-total-tokens ${MAX_TOTAL_TOKENS}
llm:
image: opea/llm-docsum:latest
container_name: llm-docsum-server
depends_on:
tgi-service:
condition: service_healthy
ports:
- ${DOCSUM_PORT:-9000}:9000
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
LLM_ENDPOINT: ${LLM_ENDPOINT}
LLM_MODEL_ID: ${LLM_MODEL_ID}
DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME}
LOGFLAG: ${LOGFLAG:-False}
restart: unless-stopped

networks:
default:
driver: bridge
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

version: "3.8"

services:
tgi-service:
image: ghcr.io/huggingface/tgi-gaudi:2.3.1
container_name: tgi_gaudi_server
ports:
- ${LLM_ENDPOINT_PORT:-8008}:80
volumes:
- "./data:/data"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
ENABLE_HPU_GRAPH: true
LIMIT_HPU_GRAPH: true
USE_FLASH_ATTENTION: true
FLASH_ATTENTION_RECOMPUTE: true
host_ip: ${host_ip}
LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT}
runtime: habana
cap_add:
- SYS_NICE
ipc: host
healthcheck:
test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"]
interval: 10s
timeout: 10s
retries: 100
command: --model-id ${LLM_MODEL_ID} --max-input-length ${MAX_INPUT_TOKENS} --max-total-tokens ${MAX_TOTAL_TOKENS}
llm:
image: opea/llm-docsum:latest
container_name: llm-docsum-server
depends_on:
tgi-service:
condition: service_healthy
ports:
- ${DOCSUM_PORT:-9000}:9000
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
LLM_ENDPOINT: ${LLM_ENDPOINT}
LLM_MODEL_ID: ${LLM_MODEL_ID}
DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME}
LOGFLAG: ${LOGFLAG:-False}
restart: unless-stopped

networks:
default:
driver: bridge
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

version: "3.8"

services:
vllm-service:
image: opea/vllm:latest
container_name: vllm-server
ports:
- ${LLM_ENDPOINT_PORT:-8008}:80
volumes:
- "./data:/data"
shm_size: 128g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
LLM_MODEL_ID: ${LLM_MODEL_ID}
VLLM_TORCH_PROFILER_DIR: "/mnt"
host_ip: ${host_ip}
LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT}
VLLM_SKIP_WARMUP: ${VLLM_SKIP_WARMUP:-false}
healthcheck:
test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"]
interval: 10s
timeout: 10s
retries: 100
command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80
llm:
image: opea/llm-docsum:latest
container_name: llm-docsum-server
depends_on:
vllm-service:
condition: service_healthy
ports:
- ${DOCSUM_PORT:-9000}:9000
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
LLM_ENDPOINT: ${LLM_ENDPOINT}
LLM_MODEL_ID: ${LLM_MODEL_ID}
DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME}
LOGFLAG: ${LOGFLAG:-False}
restart: unless-stopped

networks:
default:
driver: bridge
Original file line number Diff line number Diff line change
Expand Up @@ -8,37 +8,52 @@ services:
image: opea/vllm-gaudi:latest
container_name: vllm-gaudi-server
ports:
- "8008:80"
- ${LLM_ENDPOINT_PORT:-8008}:80
volumes:
- "./data:/data"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_TOKEN: ${HF_TOKEN}
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
LLM_MODEL_ID: ${LLM_MODEL_ID}
MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
VLLM_TORCH_PROFILER_DIR: "/mnt"
host_ip: ${host_ip}
LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT}
VLLM_SKIP_WARMUP: ${VLLM_SKIP_WARMUP:-false}
runtime: habana
cap_add:
- SYS_NICE
ipc: host
command: --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80
healthcheck:
test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"]
interval: 10s
timeout: 10s
retries: 100
command: --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq-len-to-capture ${MAX_INPUT_TOKENS}
llm:
image: opea/llm-docsum-vllm:latest
container_name: llm-docsum-vllm-server
image: opea/llm-docsum:latest
container_name: llm-docsum-server
depends_on:
vllm-service:
condition: service_healthy
ports:
- "9000:9000"
- ${DOCSUM_PORT:-9000}:9000
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
vLLM_ENDPOINT: ${vLLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
LLM_MODEL_ID: ${LLM_MODEL_ID}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
LLM_ENDPOINT: ${LLM_ENDPOINT}
LLM_MODEL_ID: ${LLM_MODEL_ID}
DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME}
LOGFLAG: ${LOGFLAG:-False}
restart: unless-stopped

networks:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@ COPY comps /home/user/comps

RUN pip install --no-cache-dir --upgrade pip setuptools && \
if [ ${ARCH} = "cpu" ]; then pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \
pip install --no-cache-dir -r /home/user/comps/llms/summarization/tgi/langchain/requirements.txt
pip install --no-cache-dir -r /home/user/comps/llms/src/doc-summarization/requirements.txt

ENV PYTHONPATH=$PYTHONPATH:/home/user

WORKDIR /home/user/comps/llms/summarization/tgi/langchain
WORKDIR /home/user/comps/llms/src/doc-summarization

ENTRYPOINT ["bash", "entrypoint.sh"]
Loading
Loading