diff --git a/AgentQnA/docker_compose/amd/gpu/rocm/README.md b/AgentQnA/docker_compose/amd/gpu/rocm/README.md index fe5253ed07..5a74979c89 100644 --- a/AgentQnA/docker_compose/amd/gpu/rocm/README.md +++ b/AgentQnA/docker_compose/amd/gpu/rocm/README.md @@ -64,7 +64,7 @@ We remind you that when using a specific version of the code, you need to use th - #### Optional. Pull TGI Docker Image (Do this if you want to use TGI) ```bash - docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm + docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-rocm ``` - #### Build Docker Images @@ -110,7 +110,7 @@ We remind you that when using a specific version of the code, you need to use th ##### TGI-based application: - - ghcr.io/huggingface/text-generation-inference:2.3.1-rocm + - ghcr.io/huggingface/text-generation-inference:2.4.1-rocm - opea/agent:latest - redis/redis-stack:7.2.0-v9 - ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 diff --git a/AgentQnA/docker_compose/amd/gpu/rocm/compose.yaml b/AgentQnA/docker_compose/amd/gpu/rocm/compose.yaml index 4eab372dec..d674aaf0e6 100644 --- a/AgentQnA/docker_compose/amd/gpu/rocm/compose.yaml +++ b/AgentQnA/docker_compose/amd/gpu/rocm/compose.yaml @@ -2,7 +2,7 @@ services: tgi-service: - image: ghcr.io/huggingface/text-generation-inference:3.0.0-rocm + image: ghcr.io/huggingface/text-generation-inference:2.4.1-rocm container_name: tgi-service ports: - "${TGI_SERVICE_PORT-8085}:80" diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml b/AudioQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml index d421f488fd..5fa82b3d8f 100644 --- a/AudioQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml +++ b/AudioQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml @@ -25,7 +25,7 @@ services: https_proxy: ${https_proxy} restart: unless-stopped tgi-service: - image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu + image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu container_name: tgi-service ports: - ${LLM_SERVER_PORT:-3006}:80 diff --git a/AvatarChatbot/docker_compose/intel/cpu/xeon/README.md b/AvatarChatbot/docker_compose/intel/cpu/xeon/README.md index bf686ce99e..67d9d0b456 100644 --- a/AvatarChatbot/docker_compose/intel/cpu/xeon/README.md +++ b/AvatarChatbot/docker_compose/intel/cpu/xeon/README.md @@ -19,7 +19,7 @@ docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build ### 3. Build LLM Image -Intel Xeon optimized image hosted in huggingface repo will be used for TGI service: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu (https://github.com/huggingface/text-generation-inference) +Intel Xeon optimized image hosted in huggingface repo will be used for TGI service: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu (https://github.com/huggingface/text-generation-inference) ### 4. Build TTS Image diff --git a/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml b/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml index f33449d020..c9748421a0 100644 --- a/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml +++ b/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml @@ -26,7 +26,7 @@ services: https_proxy: ${https_proxy} restart: unless-stopped tgi-service: - image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu + image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu container_name: tgi-service ports: - "3006:80" diff --git a/AvatarChatbot/docker_compose/intel/hpu/gaudi/README.md b/AvatarChatbot/docker_compose/intel/hpu/gaudi/README.md index 994d400ce4..ecd4bb5ec8 100644 --- a/AvatarChatbot/docker_compose/intel/hpu/gaudi/README.md +++ b/AvatarChatbot/docker_compose/intel/hpu/gaudi/README.md @@ -19,7 +19,7 @@ docker build -t opea/whisper-gaudi:latest --build-arg https_proxy=$https_proxy - ### 3. Build LLM Image -Intel Gaudi optimized image hosted in huggingface repo will be used for TGI service: ghcr.io/huggingface/tgi-gaudi:2.0.6 (https://github.com/huggingface/tgi-gaudi) +Intel Gaudi optimized image hosted in huggingface repo will be used for TGI service: ghcr.io/huggingface/tgi-gaudi:2.3.1 (https://github.com/huggingface/tgi-gaudi) ### 4. Build TTS Image diff --git a/AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml b/AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml index aba9bb910c..4123034856 100644 --- a/AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml @@ -38,7 +38,7 @@ services: - SYS_NICE restart: unless-stopped tgi-service: - image: ghcr.io/huggingface/tgi-gaudi:2.0.6 + image: ghcr.io/huggingface/tgi-gaudi:2.3.1 container_name: tgi-gaudi-server ports: - "3006:80" diff --git a/AvatarChatbot/tests/test_compose_on_gaudi.sh b/AvatarChatbot/tests/test_compose_on_gaudi.sh index c9d693c415..6bf2b80bcc 100755 --- a/AvatarChatbot/tests/test_compose_on_gaudi.sh +++ b/AvatarChatbot/tests/test_compose_on_gaudi.sh @@ -36,7 +36,7 @@ function build_docker_images() { service_list="avatarchatbot whisper-gaudi speecht5-gaudi wav2lip-gaudi animation" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6 + docker pull ghcr.io/huggingface/tgi-gaudi:2.3.1 docker images && sleep 1s } diff --git a/AvatarChatbot/tests/test_compose_on_rocm.sh b/AvatarChatbot/tests/test_compose_on_rocm.sh index dab4564a2d..c8c49f6df1 100644 --- a/AvatarChatbot/tests/test_compose_on_rocm.sh +++ b/AvatarChatbot/tests/test_compose_on_rocm.sh @@ -34,7 +34,7 @@ function build_docker_images() { service_list="avatarchatbot whisper asr speecht5 tts wav2lip animation" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm + docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-rocm docker images && sleep 3s } diff --git a/AvatarChatbot/tests/test_compose_on_xeon.sh b/AvatarChatbot/tests/test_compose_on_xeon.sh index b0013aa2af..e572153cbb 100755 --- a/AvatarChatbot/tests/test_compose_on_xeon.sh +++ b/AvatarChatbot/tests/test_compose_on_xeon.sh @@ -36,7 +36,7 @@ function build_docker_images() { service_list="avatarchatbot whisper speecht5 wav2lip animation" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu + docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu docker images && sleep 1s } diff --git a/ChatQnA/docker_compose/amd/gpu/rocm/README.md b/ChatQnA/docker_compose/amd/gpu/rocm/README.md index 4d968b84eb..c0ec637d37 100644 --- a/ChatQnA/docker_compose/amd/gpu/rocm/README.md +++ b/ChatQnA/docker_compose/amd/gpu/rocm/README.md @@ -165,7 +165,7 @@ eaf24161aca8 opea/nginx:latest "/docker- 05512bd29fee opea/dataprep:latest "sh -c 'python $( [ …" 37 seconds ago Up 36 seconds (healthy) 0.0.0.0:18103->5000/tcp, [::]:18103->5000/tcp chatqna-dataprep-service 49844d339d1d opea/retriever:latest "python opea_retriev…" 37 seconds ago Up 36 seconds 0.0.0.0:7000->7000/tcp, [::]:7000->7000/tcp chatqna-retriever 75b698fe7de0 ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 "text-embeddings-rou…" 37 seconds ago Up 36 seconds 0.0.0.0:18808->80/tcp, [::]:18808->80/tcp chatqna-tei-reranking-service -342f01bfdbb2 ghcr.io/huggingface/text-generation-inference:2.3.1-rocm"python3 /workspace/…" 37 seconds ago Up 36 seconds 0.0.0.0:18008->8011/tcp, [::]:18008->8011/tcp chatqna-tgi-service +342f01bfdbb2 ghcr.io/huggingface/text-generation-inference:2.4.1-rocm"python3 /workspace/…" 37 seconds ago Up 36 seconds 0.0.0.0:18008->8011/tcp, [::]:18008->8011/tcp chatqna-tgi-service 6081eb1c119d redis/redis-stack:7.2.0-v9 "/entrypoint.sh" 37 seconds ago Up 36 seconds 0.0.0.0:6379->6379/tcp, [::]:6379->6379/tcp, 0.0.0.0:8001->8001/tcp, [::]:8001->8001/tcp chatqna-redis-vector-db eded17420782 ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 "text-embeddings-rou…" 37 seconds ago Up 36 seconds 0.0.0.0:18090->80/tcp, [::]:18090->80/tcp chatqna-tei-embedding-service ``` @@ -181,7 +181,7 @@ e0ef1ea67640 opea/llm-faqgen:latest "bash ent 05512bd29fee opea/dataprep:latest "sh -c 'python $( [ …" 37 seconds ago Up 36 seconds (healthy) 0.0.0.0:18103->5000/tcp, [::]:18103->5000/tcp chatqna-dataprep-service 49844d339d1d opea/retriever:latest "python opea_retriev…" 37 seconds ago Up 36 seconds 0.0.0.0:7000->7000/tcp, [::]:7000->7000/tcp chatqna-retriever 75b698fe7de0 ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 "text-embeddings-rou…" 37 seconds ago Up 36 seconds 0.0.0.0:18808->80/tcp, [::]:18808->80/tcp chatqna-tei-reranking-service -342f01bfdbb2 ghcr.io/huggingface/text-generation-inference:2.3.1-rocm"python3 /workspace/…" 37 seconds ago Up 36 seconds 0.0.0.0:18008->8011/tcp, [::]:18008->8011/tcp chatqna-tgi-service +342f01bfdbb2 ghcr.io/huggingface/text-generation-inference:2.4.1-rocm"python3 /workspace/…" 37 seconds ago Up 36 seconds 0.0.0.0:18008->8011/tcp, [::]:18008->8011/tcp chatqna-tgi-service 6081eb1c119d redis/redis-stack:7.2.0-v9 "/entrypoint.sh" 37 seconds ago Up 36 seconds 0.0.0.0:6379->6379/tcp, [::]:6379->6379/tcp, 0.0.0.0:8001->8001/tcp, [::]:8001->8001/tcp chatqna-redis-vector-db eded17420782 ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 "text-embeddings-rou…" 37 seconds ago Up 36 seconds 0.0.0.0:18090->80/tcp, [::]:18090->80/tcp chatqna-tei-embedding-service ``` diff --git a/ChatQnA/docker_compose/amd/gpu/rocm/compose.yaml b/ChatQnA/docker_compose/amd/gpu/rocm/compose.yaml index 14f2eb3312..33f9770629 100644 --- a/ChatQnA/docker_compose/amd/gpu/rocm/compose.yaml +++ b/ChatQnA/docker_compose/amd/gpu/rocm/compose.yaml @@ -85,7 +85,7 @@ services: command: --model-id ${CHATQNA_RERANK_MODEL_ID} --auto-truncate chatqna-tgi-service: - image: ghcr.io/huggingface/text-generation-inference:2.3.1-rocm + image: ghcr.io/huggingface/text-generation-inference:2.4.1-rocm container_name: chatqna-tgi-service ports: - "${CHATQNA_TGI_SERVICE_PORT}:80" diff --git a/ChatQnA/docker_compose/amd/gpu/rocm/compose_faqgen.yaml b/ChatQnA/docker_compose/amd/gpu/rocm/compose_faqgen.yaml index df2a9a42a3..9ffd813134 100644 --- a/ChatQnA/docker_compose/amd/gpu/rocm/compose_faqgen.yaml +++ b/ChatQnA/docker_compose/amd/gpu/rocm/compose_faqgen.yaml @@ -85,7 +85,7 @@ services: command: --model-id ${CHATQNA_RERANK_MODEL_ID} --auto-truncate chatqna-tgi-service: - image: ghcr.io/huggingface/text-generation-inference:2.3.1-rocm + image: ghcr.io/huggingface/text-generation-inference:2.4.1-rocm container_name: chatqna-tgi-service ports: - "${CHATQNA_TGI_SERVICE_PORT}:80" diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_faqgen_tgi.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_faqgen_tgi.yaml index a66be60327..18410bf071 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_faqgen_tgi.yaml +++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_faqgen_tgi.yaml @@ -81,7 +81,7 @@ services: HF_HUB_ENABLE_HF_TRANSFER: 0 command: --model-id ${RERANK_MODEL_ID} --auto-truncate tgi-service: - image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu + image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu container_name: tgi-server ports: - ${LLM_ENDPOINT_PORT:-9009}:80 diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml index 434ae34eac..4a509ced70 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml +++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml @@ -81,7 +81,7 @@ services: HF_HUB_ENABLE_HF_TRANSFER: 0 command: --model-id ${RERANK_MODEL_ID} --auto-truncate tgi-service: - image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu + image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu container_name: tgi-service ports: - "9009:80" diff --git a/ChatQnA/kubernetes/gmc/README.md b/ChatQnA/kubernetes/gmc/README.md index 5775d14b88..29e69d6b21 100644 --- a/ChatQnA/kubernetes/gmc/README.md +++ b/ChatQnA/kubernetes/gmc/README.md @@ -18,7 +18,7 @@ The ChatQnA uses the below prebuilt images if you choose a Xeon deployment - tei_embedding_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.6 - retriever: opea/retriever:latest - tei_xeon_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.6 -- tgi-service: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu +- tgi-service: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu - chaqna-xeon-backend-server: opea/chatqna:latest Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services. diff --git a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml index eec356dd8c..2e25274644 100644 --- a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml +++ b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml @@ -4,7 +4,7 @@ services: tgi-service: - image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu + image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu container_name: tgi-server profiles: - codegen-xeon-tgi diff --git a/CodeGen/tests/test_compose_on_rocm.sh b/CodeGen/tests/test_compose_on_rocm.sh index 94f006e358..6c7031456e 100644 --- a/CodeGen/tests/test_compose_on_rocm.sh +++ b/CodeGen/tests/test_compose_on_rocm.sh @@ -29,7 +29,7 @@ function build_docker_images() { service_list="codegen codegen-ui llm-textgen" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm + docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-rocm docker images && sleep 1s } diff --git a/CodeGen/tests/test_compose_on_xeon.sh b/CodeGen/tests/test_compose_on_xeon.sh index 4aaa180ec3..9596aed5af 100644 --- a/CodeGen/tests/test_compose_on_xeon.sh +++ b/CodeGen/tests/test_compose_on_xeon.sh @@ -46,7 +46,7 @@ function build_docker_images() { docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu + docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu docker images && sleep 1s } diff --git a/CodeTrans/docker_compose/amd/gpu/rocm/README.md b/CodeTrans/docker_compose/amd/gpu/rocm/README.md index 9ea891b496..d33364e2b2 100644 --- a/CodeTrans/docker_compose/amd/gpu/rocm/README.md +++ b/CodeTrans/docker_compose/amd/gpu/rocm/README.md @@ -150,7 +150,7 @@ eaf24161aca8 opea/nginx:latest "/docker- 2fce48a4c0f4 opea/codetrans-ui:latest "docker-entrypoint.s…" 37 seconds ago Up 5 seconds 0.0.0.0:18101->5173/tcp, [::]:18101->5173/tcp codetrans-ui-server 613c384979f4 opea/codetrans:latest "bash entrypoint.sh" 37 seconds ago Up 5 seconds 0.0.0.0:18102->8888/tcp, [::]:18102->8888/tcp codetrans-backend-server e0ef1ea67640 opea/llm-textgen:latest "bash entrypoint.sh" 37 seconds ago Up 36 seconds 0.0.0.0:18011->9000/tcp, [::]:18011->9000/tcp codetrans-llm-server -342f01bfdbb2 ghcr.io/huggingface/text-generation-inference:2.3.1-rocm"python3 /workspace/…" 37 seconds ago Up 36 seconds 0.0.0.0:18008->8011/tcp, [::]:18008->8011/tcp codetrans-tgi-service +342f01bfdbb2 ghcr.io/huggingface/text-generation-inference:2.4.1-rocm"python3 /workspace/…" 37 seconds ago Up 36 seconds 0.0.0.0:18008->8011/tcp, [::]:18008->8011/tcp codetrans-tgi-service ``` if used vLLM: diff --git a/CodeTrans/docker_compose/intel/cpu/xeon/compose_tgi.yaml b/CodeTrans/docker_compose/intel/cpu/xeon/compose_tgi.yaml index 77c668241c..27b726f8cc 100644 --- a/CodeTrans/docker_compose/intel/cpu/xeon/compose_tgi.yaml +++ b/CodeTrans/docker_compose/intel/cpu/xeon/compose_tgi.yaml @@ -3,7 +3,7 @@ services: tgi-service: - image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu + image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu container_name: codetrans-xeon-tgi-service ports: - "8008:80" diff --git a/CodeTrans/docker_compose/intel/hpu/gaudi/compose_tgi.yaml b/CodeTrans/docker_compose/intel/hpu/gaudi/compose_tgi.yaml index 9bcc01f318..023eed2adf 100644 --- a/CodeTrans/docker_compose/intel/hpu/gaudi/compose_tgi.yaml +++ b/CodeTrans/docker_compose/intel/hpu/gaudi/compose_tgi.yaml @@ -3,7 +3,7 @@ services: tgi-service: - image: ghcr.io/huggingface/tgi-gaudi:2.0.6 + image: ghcr.io/huggingface/tgi-gaudi:2.3.1 container_name: codetrans-gaudi-tgi-service ports: - "8008:80" diff --git a/CodeTrans/tests/test_compose_tgi_on_gaudi.sh b/CodeTrans/tests/test_compose_tgi_on_gaudi.sh index 051afce9d4..e1e0a4d3e6 100644 --- a/CodeTrans/tests/test_compose_tgi_on_gaudi.sh +++ b/CodeTrans/tests/test_compose_tgi_on_gaudi.sh @@ -29,7 +29,7 @@ function build_docker_images() { service_list="codetrans codetrans-ui llm-textgen nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6 + docker pull ghcr.io/huggingface/tgi-gaudi:2.3.1 docker images && sleep 1s } diff --git a/CodeTrans/tests/test_compose_tgi_on_xeon.sh b/CodeTrans/tests/test_compose_tgi_on_xeon.sh index 00da9bde73..cb51c13867 100644 --- a/CodeTrans/tests/test_compose_tgi_on_xeon.sh +++ b/CodeTrans/tests/test_compose_tgi_on_xeon.sh @@ -29,7 +29,7 @@ function build_docker_images() { service_list="codetrans codetrans-ui llm-textgen nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu + docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu docker images && sleep 1s } diff --git a/DBQnA/docker_compose/intel/cpu/xeon/compose.yaml b/DBQnA/docker_compose/intel/cpu/xeon/compose.yaml index b96a71d01d..ebfe1f8dec 100644 --- a/DBQnA/docker_compose/intel/cpu/xeon/compose.yaml +++ b/DBQnA/docker_compose/intel/cpu/xeon/compose.yaml @@ -3,7 +3,7 @@ services: tgi-service: - image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu + image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu container_name: tgi-service ports: - "8008:80" diff --git a/DBQnA/tests/test_compose_on_xeon.sh b/DBQnA/tests/test_compose_on_xeon.sh index c410cc48f8..c3255d484b 100755 --- a/DBQnA/tests/test_compose_on_xeon.sh +++ b/DBQnA/tests/test_compose_on_xeon.sh @@ -23,7 +23,7 @@ function build_docker_images() { echo "Build all the images with --no-cache, check docker_image_build.log for details..." docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu + docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu docker images && sleep 1s } diff --git a/DocSum/docker_compose/intel/cpu/xeon/compose_tgi.yaml b/DocSum/docker_compose/intel/cpu/xeon/compose_tgi.yaml index 4b0362bd09..8ab5652b9e 100644 --- a/DocSum/docker_compose/intel/cpu/xeon/compose_tgi.yaml +++ b/DocSum/docker_compose/intel/cpu/xeon/compose_tgi.yaml @@ -3,7 +3,7 @@ services: tgi-server: - image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu + image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu container_name: docsum-xeon-tgi-server ports: - ${LLM_ENDPOINT_PORT:-8008}:80 diff --git a/DocSum/kubernetes/gmc/README.md b/DocSum/kubernetes/gmc/README.md index aaab01a8c8..e6175f1587 100644 --- a/DocSum/kubernetes/gmc/README.md +++ b/DocSum/kubernetes/gmc/README.md @@ -8,7 +8,7 @@ Install GMC in your Kubernetes cluster, if you have not already done so, by foll The DocSum application is defined as a Custom Resource (CR) file that the above GMC operator acts upon. It first checks if the microservices listed in the CR yaml file are running, if not it starts them and then proceeds to connect them. When the DocSum RAG pipeline is ready, the service endpoint details are returned, letting you use the application. Should you use "kubectl get pods" commands you will see all the component microservices, in particular embedding, retriever, rerank, and llm. The DocSum pipeline uses prebuilt images. The Xeon version uses the prebuilt image `llm-docsum-tgi:latest` which internally leverages the -the image `ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu`. The service is called tgi-svc. Meanwhile, the Gaudi version launches the +the image `ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu`. The service is called tgi-svc. Meanwhile, the Gaudi version launches the service tgi-gaudi-svc, which uses the image `ghcr.io/huggingface/tgi-gaudi:2.3.1`. Both TGI model services serve the model specified in the LLM_MODEL_ID variable that is exported by you. In the below example we use `Intel/neural-chat-7b-v3-3`. [NOTE] diff --git a/DocSum/tests/test_compose_on_rocm.sh b/DocSum/tests/test_compose_on_rocm.sh index ee95ffc0be..1ce4f64734 100644 --- a/DocSum/tests/test_compose_on_rocm.sh +++ b/DocSum/tests/test_compose_on_rocm.sh @@ -30,7 +30,7 @@ function build_docker_images() { service_list="docsum docsum-gradio-ui whisper llm-docsum" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm + docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-rocm docker images && sleep 3s } diff --git a/DocSum/tests/test_compose_tgi_on_xeon.sh b/DocSum/tests/test_compose_tgi_on_xeon.sh index 4ac895d7a0..a9d83fc54b 100644 --- a/DocSum/tests/test_compose_tgi_on_xeon.sh +++ b/DocSum/tests/test_compose_tgi_on_xeon.sh @@ -39,7 +39,7 @@ function build_docker_images() { service_list="docsum docsum-gradio-ui whisper llm-docsum" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/text-generation-inference:1.4 + docker pull ghcr.io/huggingface/text-generation-inference:2.4.1 docker images && sleep 1s } diff --git a/DocSum/tests/test_compose_vllm_on_rocm.sh b/DocSum/tests/test_compose_vllm_on_rocm.sh index 2eb360f178..744280d4e0 100644 --- a/DocSum/tests/test_compose_vllm_on_rocm.sh +++ b/DocSum/tests/test_compose_vllm_on_rocm.sh @@ -30,7 +30,7 @@ function build_docker_images() { service_list="docsum docsum-gradio-ui whisper llm-docsum vllm-rocm" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm + docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-rocm docker images && sleep 3s } diff --git a/MultimodalQnA/docker_compose/amd/gpu/rocm/README.md b/MultimodalQnA/docker_compose/amd/gpu/rocm/README.md index 14e66d989a..8489ab366e 100644 --- a/MultimodalQnA/docker_compose/amd/gpu/rocm/README.md +++ b/MultimodalQnA/docker_compose/amd/gpu/rocm/README.md @@ -77,7 +77,7 @@ After launching your instance, you can connect to it using SSH (for Linux instan - #### Optional. Pull TGI Docker Image (Do this if you want to use TGI) ```bash - docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm + docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-rocm ``` - #### Build Docker Images diff --git a/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml b/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml index 00a16c1670..99d8c7b551 100644 --- a/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml +++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml @@ -100,7 +100,7 @@ services: timeout: 10s retries: 60 tgi-service: - image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu + image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu container_name: tgi-service ports: - "9009:80" @@ -156,7 +156,7 @@ services: ipc: host restart: always tgi_service_codegen: - image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu + image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu container_name: tgi_service_codegen ports: - "8028:80" diff --git a/ProductivitySuite/tests/test_compose_on_xeon.sh b/ProductivitySuite/tests/test_compose_on_xeon.sh index 57f903ed95..cd2feb3dc5 100755 --- a/ProductivitySuite/tests/test_compose_on_xeon.sh +++ b/ProductivitySuite/tests/test_compose_on_xeon.sh @@ -23,7 +23,7 @@ function build_docker_images() { docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.6 - docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu + docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu docker images && sleep 1s } diff --git a/SearchQnA/docker_compose/intel/cpu/xeon/compose.yaml b/SearchQnA/docker_compose/intel/cpu/xeon/compose.yaml index 4503a645bb..9b692a3d95 100644 --- a/SearchQnA/docker_compose/intel/cpu/xeon/compose.yaml +++ b/SearchQnA/docker_compose/intel/cpu/xeon/compose.yaml @@ -91,7 +91,7 @@ services: LOGFLAG: ${LOGFLAG} restart: unless-stopped tgi-service: - image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu + image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu container_name: tgi-service ports: - "3006:80" diff --git a/SearchQnA/tests/test_compose_on_xeon.sh b/SearchQnA/tests/test_compose_on_xeon.sh index fb5cfaa469..89e4b8e5f9 100644 --- a/SearchQnA/tests/test_compose_on_xeon.sh +++ b/SearchQnA/tests/test_compose_on_xeon.sh @@ -36,7 +36,7 @@ function build_docker_images() { docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.6 - docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu + docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu docker images && sleep 1s } diff --git a/Translation/docker_compose/intel/cpu/xeon/README.md b/Translation/docker_compose/intel/cpu/xeon/README.md index 095ca54c38..df5d3214a0 100644 --- a/Translation/docker_compose/intel/cpu/xeon/README.md +++ b/Translation/docker_compose/intel/cpu/xeon/README.md @@ -85,7 +85,7 @@ CONTAINER ID IMAGE C 68b8b86a737e opea/translation-ui:latest "docker-entrypoint.s…" 7 minutes ago Up About a minute 0.0.0.0:5173->5173/tcp, :::5173->5173/tcp translation-xeon-ui-server 8400903275b5 opea/translation:latest "python translation.…" 7 minutes ago Up About a minute 0.0.0.0:8888->8888/tcp, :::8888->8888/tcp translation-xeon-backend-server 2da5545cb18c opea/llm-textgen:latest "bash entrypoint.sh" 7 minutes ago Up About a minute 0.0.0.0:9000->9000/tcp, :::9000->9000/tcp llm-textgen-server -dee02c1fb538 ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu "text-generation-lau…" 7 minutes ago Up 7 minutes (healthy) 0.0.0.0:8008->80/tcp, [::]:8008->80/tcp tgi-service +dee02c1fb538 ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu "text-generation-lau…" 7 minutes ago Up 7 minutes (healthy) 0.0.0.0:8008->80/tcp, [::]:8008->80/tcp tgi-service ``` ### Test the Pipeline @@ -125,7 +125,7 @@ The compose.yaml is default compose file using tgi as serving framework | Service Name | Image Name | | ------------------------------- | ------------------------------------------------------------- | -| tgi-service | ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu | +| tgi-service | ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu | | llm | opea/llm-textgen:latest | | translation-xeon-backend-server | opea/translation:latest | | translation-xeon-ui-server | opea/translation-ui:latest | @@ -137,7 +137,7 @@ The table provides a comprehensive overview of the Translation service utilized | Service Name | Possible Image Names | Optional | Description | | ------------------------------- | ------------------------------------------------------------- | -------- | ----------------------------------------------------------------------------------------------- | -| tgi-service | ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu | No | Specific to the TGI deployment, focuses on text generation inference using Xeon hardware. | +| tgi-service | ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu | No | Specific to the TGI deployment, focuses on text generation inference using Xeon hardware. | | llm | opea/llm-textgen:latest | No | Handles large language model (LLM) tasks | | translation-xeon-backend-server | opea/translation:latest | No | Serves as the backend for the Translation service, with variations depending on the deployment. | | translation-xeon-ui-server | opea/translation-ui:latest | No | Provides the user interface for the Translation service. | diff --git a/Translation/docker_compose/intel/cpu/xeon/compose.yaml b/Translation/docker_compose/intel/cpu/xeon/compose.yaml index 4b77d84484..aeb94f8fdd 100644 --- a/Translation/docker_compose/intel/cpu/xeon/compose.yaml +++ b/Translation/docker_compose/intel/cpu/xeon/compose.yaml @@ -3,7 +3,7 @@ services: tgi-service: - image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu + image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu container_name: tgi-service ports: - "8008:80" diff --git a/Translation/tests/test_compose_on_xeon.sh b/Translation/tests/test_compose_on_xeon.sh index 7eeec8c7a0..8195ea5b3a 100644 --- a/Translation/tests/test_compose_on_xeon.sh +++ b/Translation/tests/test_compose_on_xeon.sh @@ -35,7 +35,7 @@ function build_docker_images() { service_list="translation translation-ui llm-textgen nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu + docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu docker images && sleep 1s } diff --git a/VisualQnA/docker_compose/amd/gpu/rocm/README.md b/VisualQnA/docker_compose/amd/gpu/rocm/README.md index 1647b16b2a..9a582c9dbf 100644 --- a/VisualQnA/docker_compose/amd/gpu/rocm/README.md +++ b/VisualQnA/docker_compose/amd/gpu/rocm/README.md @@ -71,7 +71,7 @@ - #### Optional. Pull TGI Docker Image (Do this if you want to use TGI) ```bash - docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm + docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-rocm ``` - #### Build Docker Images diff --git a/VisualQnA/docker_compose/intel/cpu/xeon/README.md b/VisualQnA/docker_compose/intel/cpu/xeon/README.md index cfbc3ab1c1..35524d99ed 100644 --- a/VisualQnA/docker_compose/intel/cpu/xeon/README.md +++ b/VisualQnA/docker_compose/intel/cpu/xeon/README.md @@ -48,13 +48,13 @@ docker build --no-cache -t opea/visualqna-ui:latest --build-arg https_proxy=$htt # vLLM docker pull opea/vllm:latest # TGI (Optional) -docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu +docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu ``` Then run the command `docker images`, you will have the following Docker Images: 1. `opea/vllm:latest` -2. `ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu` (Optional) +2. `ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu` (Optional) 3. `opea/lvm:latest` 4. `opea/visualqna:latest` 5. `opea/visualqna-ui:latest` diff --git a/VisualQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml b/VisualQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml index b595bdcba7..5bacf1108d 100644 --- a/VisualQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml +++ b/VisualQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml @@ -3,7 +3,7 @@ services: llava-tgi-service: - image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu + image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu container_name: tgi-llava-xeon-server ports: - "8399:80" diff --git a/VisualQnA/tests/test_compose_tgi_on_xeon.sh b/VisualQnA/tests/test_compose_tgi_on_xeon.sh index 29a009904d..8ef6aadb6a 100644 --- a/VisualQnA/tests/test_compose_tgi_on_xeon.sh +++ b/VisualQnA/tests/test_compose_tgi_on_xeon.sh @@ -28,6 +28,8 @@ function build_docker_images() { service_list="visualqna visualqna-ui lvm nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log + docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu + docker images && sleep 1s }