Skip to content

Commit

Permalink
update tgi with text-generation-inference:2.1.0 (#273)
Browse files Browse the repository at this point in the history
Signed-off-by: chensuyue <suyue.chen@intel.com>
  • Loading branch information
chensuyue authored Jul 9, 2024
1 parent 61795fd commit f236949
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 27 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/microservice-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,14 @@ jobs:
cd tests
service=$(echo $service_path | tr '/' '_')
echo "service=${service}" >> $GITHUB_ENV
if [ -f test_${service}.sh ]; then timeout 10m bash test_${service}.sh; else echo "Test script not found, skip test!"; fi
if [ -f test_${service}.sh ]; then timeout 30m bash test_${service}.sh; else echo "Test script not found, skip test!"; fi
- name: Clean up container
if: cancelled() || failure()
run: |
cid=$(docker ps -aq --filter "name=test-comps-*")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
echo y | docker system prune
echo y | docker system prune --all
- name: Publish pipeline artifact
if: ${{ !cancelled() }}
Expand Down
18 changes: 9 additions & 9 deletions comps/llms/text-generation/tgi/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ export HF_TOKEN=${your_hf_api_token}
export LANGCHAIN_TRACING_V2=true
export LANGCHAIN_API_KEY=${your_langchain_api_key}
export LANGCHAIN_PROJECT="opea/gen-ai-comps:llms"
docker run -p 8008:80 -v ./data:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:1.4 --model-id ${your_hf_llm_model}
docker run -p 8008:80 -v ./data:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id ${your_hf_llm_model}
```

## 1.3 Verify the TGI Service
Expand Down Expand Up @@ -114,11 +114,11 @@ curl http://${your_ip}:9000/v1/chat/completions \

## 4. Validated Model

| Model | TGI-Gaudi |
| ------------------------- | --------- |
| Intel/neural-chat-7b-v3-3 | |
| Llama-2-7b-chat-hf | |
| Llama-2-70b-chat-hf | |
| Meta-Llama-3-8B-Instruct | |
| Meta-Llama-3-70B-Instruct | |
| Phi-3 | x |
| Model | TGI |
| ------------------------- | --- |
| Intel/neural-chat-7b-v3-3 ||
| Llama-2-7b-chat-hf ||
| Llama-2-70b-chat-hf ||
| Meta-Llama-3-8B-Instruct ||
| Meta-Llama-3-70B-Instruct ||
| Phi-3 | |
9 changes: 0 additions & 9 deletions comps/llms/text-generation/tgi/build_docker.sh

This file was deleted.

2 changes: 1 addition & 1 deletion comps/llms/text-generation/tgi/docker_compose_llm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ version: "3.8"

services:
tgi_service:
image: ghcr.io/huggingface/text-generation-inference:1.4
image: ghcr.io/huggingface/text-generation-inference:2.1.0
container_name: tgi-service
ports:
- "8008:80"
Expand Down
19 changes: 13 additions & 6 deletions tests/test_llms_text-generation_tgi.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ function build_docker_images() {

function start_service() {
tgi_endpoint_port=5004
export your_hf_llm_model="Intel/neural-chat-7b-v3-3"
export your_hf_llm_model=$1
# Remember to set HF_TOKEN before invoking this test!
export HF_TOKEN=${HF_TOKEN}
docker run -d --name="test-comps-llm-tgi-endpoint" -e https_proxy -e http_proxy -p $tgi_endpoint_port:80 -v ./data:/data --shm-size 1g ghcr.io/huggingface/text-generation-inference:1.4 --model-id ${your_hf_llm_model}
docker run -d --name="test-comps-llm-tgi-endpoint" -p $tgi_endpoint_port:80 -v ./data:/data --shm-size 1g -e HF_TOKEN=${HF_TOKEN} ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id ${your_hf_llm_model} --max-input-tokens 1024 --max-total-tokens 2048
export TGI_LLM_ENDPOINT="http://${ip_address}:${tgi_endpoint_port}"

tei_service_port=5005
Expand Down Expand Up @@ -55,13 +55,20 @@ function stop_docker() {
function main() {

stop_docker

build_docker_images
start_service

validate_microservice
llm_models=(
Intel/neural-chat-7b-v3-3
meta-llama/Llama-2-7b-chat-hf
meta-llama/Meta-Llama-3-8B-Instruct
microsoft/Phi-3-mini-4k-instruct
)
for model in "${llm_models[@]}"; do
start_service "${model}"
validate_microservice
stop_docker
done

stop_docker
echo y | docker system prune

}
Expand Down

0 comments on commit f236949

Please sign in to comment.