diff --git a/AudioQnA/docker/xeon/compose.yaml b/AudioQnA/docker/xeon/compose.yaml index f640ecae3..a0ef81d17 100644 --- a/AudioQnA/docker/xeon/compose.yaml +++ b/AudioQnA/docker/xeon/compose.yaml @@ -41,7 +41,7 @@ services: environment: TTS_ENDPOINT: ${TTS_ENDPOINT} tgi-service: - image: ghcr.io/huggingface/text-generation-inference:latest-intel-cpu + image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu container_name: tgi-service ports: - "3006:80" diff --git a/ChatQnA/docker/xeon/compose.yaml b/ChatQnA/docker/xeon/compose.yaml index 3828aa57f..4697300a7 100644 --- a/ChatQnA/docker/xeon/compose.yaml +++ b/ChatQnA/docker/xeon/compose.yaml @@ -102,7 +102,7 @@ services: HF_HUB_ENABLE_HF_TRANSFER: 0 restart: unless-stopped tgi-service: - image: ghcr.io/huggingface/text-generation-inference:latest-intel-cpu + image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu container_name: tgi-service ports: - "9009:80" diff --git a/ChatQnA/docker/xeon/compose_qdrant.yaml b/ChatQnA/docker/xeon/compose_qdrant.yaml index a149d9426..922f74dcf 100644 --- a/ChatQnA/docker/xeon/compose_qdrant.yaml +++ b/ChatQnA/docker/xeon/compose_qdrant.yaml @@ -102,7 +102,7 @@ services: HF_HUB_ENABLE_HF_TRANSFER: 0 restart: unless-stopped tgi-service: - image: ghcr.io/huggingface/text-generation-inference:latest-intel-cpu + image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu container_name: tgi-service ports: - "6042:80" diff --git a/ChatQnA/kubernetes/README.md b/ChatQnA/kubernetes/README.md index 55be03943..c666e9fae 100644 --- a/ChatQnA/kubernetes/README.md +++ b/ChatQnA/kubernetes/README.md @@ -20,7 +20,7 @@ The ChatQnA uses the below prebuilt images if you choose a Xeon deployment - retriever: opea/retriever-redis:latest - tei_xeon_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - reranking: opea/reranking-tei:latest -- tgi-service: ghcr.io/huggingface/text-generation-inference:latest-intel-cpu +- tgi-service: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu - llm: opea/llm-tgi:latest - chaqna-xeon-backend-server: opea/chatqna:latest diff --git a/ChatQnA/kubernetes/manifests/xeon/chatqna.yaml b/ChatQnA/kubernetes/manifests/xeon/chatqna.yaml index 022ddfa4c..cf5070d94 100644 --- a/ChatQnA/kubernetes/manifests/xeon/chatqna.yaml +++ b/ChatQnA/kubernetes/manifests/xeon/chatqna.yaml @@ -1121,7 +1121,7 @@ spec: name: chatqna-tgi-config securityContext: {} - image: "ghcr.io/huggingface/text-generation-inference:latest-intel-cpu" + image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu" imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /data diff --git a/CodeGen/docker/xeon/compose.yaml b/CodeGen/docker/xeon/compose.yaml index ba7bcdabf..ab1e4150c 100644 --- a/CodeGen/docker/xeon/compose.yaml +++ b/CodeGen/docker/xeon/compose.yaml @@ -3,7 +3,7 @@ services: tgi-service: - image: ghcr.io/huggingface/text-generation-inference:latest-intel-cpu + image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu container_name: tgi-service ports: - "8028:80" diff --git a/CodeGen/kubernetes/manifests/xeon/codegen.yaml b/CodeGen/kubernetes/manifests/xeon/codegen.yaml index 9b5729dd0..55fbc6ffb 100644 --- a/CodeGen/kubernetes/manifests/xeon/codegen.yaml +++ b/CodeGen/kubernetes/manifests/xeon/codegen.yaml @@ -239,7 +239,7 @@ spec: name: codegen-tgi-config securityContext: {} - image: "ghcr.io/huggingface/text-generation-inference:latest-intel-cpu" + image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu" imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /data diff --git a/CodeGen/kubernetes/manifests/xeon/ui/react-codegen.yaml b/CodeGen/kubernetes/manifests/xeon/ui/react-codegen.yaml index ac21f6835..5d77fb8cc 100644 --- a/CodeGen/kubernetes/manifests/xeon/ui/react-codegen.yaml +++ b/CodeGen/kubernetes/manifests/xeon/ui/react-codegen.yaml @@ -126,7 +126,7 @@ spec: - name: no_proxy value: securityContext: {} - image: "ghcr.io/huggingface/text-generation-inference:latest-intel-cpu" + image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu" imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /data diff --git a/CodeGen/tests/test_codegen_on_xeon.sh b/CodeGen/tests/test_codegen_on_xeon.sh index 6e759dc4e..19259d6a6 100644 --- a/CodeGen/tests/test_codegen_on_xeon.sh +++ b/CodeGen/tests/test_codegen_on_xeon.sh @@ -22,7 +22,7 @@ function build_docker_images() { service_list="codegen codegen-ui llm-tgi" docker compose -f docker_build_compose.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/text-generation-inference:latest-intel-cpu + docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu docker images } diff --git a/CodeTrans/docker/xeon/compose.yaml b/CodeTrans/docker/xeon/compose.yaml index c4666621c..e62989122 100644 --- a/CodeTrans/docker/xeon/compose.yaml +++ b/CodeTrans/docker/xeon/compose.yaml @@ -3,7 +3,7 @@ services: tgi-service: - image: ghcr.io/huggingface/text-generation-inference:latest-intel-cpu + image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu container_name: codetrans-tgi-service ports: - "8008:80" diff --git a/CodeTrans/kubernetes/manifests/xeon/codetrans.yaml b/CodeTrans/kubernetes/manifests/xeon/codetrans.yaml index 76d6fbfcb..e3b66e6b1 100644 --- a/CodeTrans/kubernetes/manifests/xeon/codetrans.yaml +++ b/CodeTrans/kubernetes/manifests/xeon/codetrans.yaml @@ -239,7 +239,7 @@ spec: name: codetrans-tgi-config securityContext: {} - image: "ghcr.io/huggingface/text-generation-inference:latest-intel-cpu" + image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu" imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /data diff --git a/DocSum/docker/xeon/compose.yaml b/DocSum/docker/xeon/compose.yaml index ffb2ba7f5..620ee3657 100644 --- a/DocSum/docker/xeon/compose.yaml +++ b/DocSum/docker/xeon/compose.yaml @@ -3,7 +3,7 @@ services: tgi-service: - image: ghcr.io/huggingface/text-generation-inference:latest-intel-cpu + image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu container_name: tgi-service ports: - "8008:80" diff --git a/DocSum/kubernetes/README.md b/DocSum/kubernetes/README.md index 6627f8cb7..c91f0fccb 100644 --- a/DocSum/kubernetes/README.md +++ b/DocSum/kubernetes/README.md @@ -8,7 +8,7 @@ Install GMC in your Kubernetes cluster, if you have not already done so, by foll The DocSum application is defined as a Custom Resource (CR) file that the above GMC operator acts upon. It first checks if the microservices listed in the CR yaml file are running, if not it starts them and then proceeds to connect them. When the DocSum RAG pipeline is ready, the service endpoint details are returned, letting you use the application. Should you use "kubectl get pods" commands you will see all the component microservices, in particular embedding, retriever, rerank, and llm. The DocSum pipeline uses prebuilt images. The Xeon version uses the prebuilt image llm-docsum-tgi:latest which internally leverages the -the image ghcr.io/huggingface/text-generation-inference:latest-intel-cpu. The service is called tgi-svc. Meanwhile, the Gaudi version launches the +the image ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu. The service is called tgi-svc. Meanwhile, the Gaudi version launches the service tgi-gaudi-svc, which uses the image ghcr.io/huggingface/tgi-gaudi:1.2.1. Both TGI model services serve the model specified in the LLM_MODEL_ID variable that is exported by you. In the below example we use Intel/neural-chat-7b-v3-3. [NOTE] diff --git a/DocSum/kubernetes/manifests/xeon/docsum.yaml b/DocSum/kubernetes/manifests/xeon/docsum.yaml index 06a73e07c..77f38b6ab 100644 --- a/DocSum/kubernetes/manifests/xeon/docsum.yaml +++ b/DocSum/kubernetes/manifests/xeon/docsum.yaml @@ -239,7 +239,7 @@ spec: name: docsum-tgi-config securityContext: {} - image: "ghcr.io/huggingface/text-generation-inference:latest-intel-cpu" + image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu" imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /data diff --git a/DocSum/kubernetes/manifests/xeon/ui/react-docsum.yaml b/DocSum/kubernetes/manifests/xeon/ui/react-docsum.yaml index 4f902a22a..61e8799b0 100644 --- a/DocSum/kubernetes/manifests/xeon/ui/react-docsum.yaml +++ b/DocSum/kubernetes/manifests/xeon/ui/react-docsum.yaml @@ -126,7 +126,7 @@ spec: - name: no_proxy value: securityContext: {} - image: "ghcr.io/huggingface/text-generation-inference:latest-intel-cpu" + image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu" imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /data diff --git a/FaqGen/docker/xeon/compose.yaml b/FaqGen/docker/xeon/compose.yaml index d5d955984..8c5c894ae 100644 --- a/FaqGen/docker/xeon/compose.yaml +++ b/FaqGen/docker/xeon/compose.yaml @@ -3,7 +3,7 @@ services: tgi-service: - image: ghcr.io/huggingface/text-generation-inference:latest-intel-cpu + image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu container_name: tgi-xeon-server ports: - "8008:80" diff --git a/FaqGen/kubernetes/manifests/xeon/ui/react-faqgen.yaml b/FaqGen/kubernetes/manifests/xeon/ui/react-faqgen.yaml index bfb3be0b0..845ba5041 100644 --- a/FaqGen/kubernetes/manifests/xeon/ui/react-faqgen.yaml +++ b/FaqGen/kubernetes/manifests/xeon/ui/react-faqgen.yaml @@ -126,7 +126,7 @@ spec: - name: no_proxy value: securityContext: {} - image: "ghcr.io/huggingface/text-generation-inference:latest-intel-cpu" + image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu" imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /data diff --git a/SearchQnA/docker/xeon/compose.yaml b/SearchQnA/docker/xeon/compose.yaml index 4dcf9b923..380eddb30 100644 --- a/SearchQnA/docker/xeon/compose.yaml +++ b/SearchQnA/docker/xeon/compose.yaml @@ -73,7 +73,7 @@ services: HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} restart: unless-stopped tgi-service: - image: ghcr.io/huggingface/text-generation-inference:latest-intel-cpu + image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu container_name: tgi-service ports: - "3006:80" diff --git a/SearchQnA/tests/test_searchqna_on_xeon.sh b/SearchQnA/tests/test_searchqna_on_xeon.sh index 8c083dc9a..03255f4a1 100644 --- a/SearchQnA/tests/test_searchqna_on_xeon.sh +++ b/SearchQnA/tests/test_searchqna_on_xeon.sh @@ -23,7 +23,7 @@ function build_docker_images() { docker compose -f docker_build_compose.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - docker pull ghcr.io/huggingface/text-generation-inference:latest-intel-cpu + docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu docker images } diff --git a/Translation/docker/xeon/compose.yaml b/Translation/docker/xeon/compose.yaml index 1ca7cee9d..4ba224bf3 100644 --- a/Translation/docker/xeon/compose.yaml +++ b/Translation/docker/xeon/compose.yaml @@ -3,7 +3,7 @@ services: tgi-service: - image: ghcr.io/huggingface/text-generation-inference:latest-intel-cpu + image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu container_name: tgi-service ports: - "8008:80" diff --git a/VisualQnA/docker/xeon/README.md b/VisualQnA/docker/xeon/README.md index 35aca85e2..73e30ab96 100644 --- a/VisualQnA/docker/xeon/README.md +++ b/VisualQnA/docker/xeon/README.md @@ -71,12 +71,12 @@ cd ../../../.. ### 4. Pull TGI Xeon Image ```bash -docker pull ghcr.io/huggingface/text-generation-inference:latest-intel-cpu +docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu ``` Then run the command `docker images`, you will have the following 4 Docker Images: -1. `ghcr.io/huggingface/text-generation-inference:latest-intel-cpu` +1. `ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu` 2. `opea/lvm-tgi:latest` 3. `opea/visualqna:latest` 4. `opea/visualqna-ui:latest` diff --git a/VisualQnA/docker/xeon/compose.yaml b/VisualQnA/docker/xeon/compose.yaml index 6bb9206b4..1fafa24ec 100644 --- a/VisualQnA/docker/xeon/compose.yaml +++ b/VisualQnA/docker/xeon/compose.yaml @@ -3,7 +3,7 @@ services: llava-tgi-service: - image: ghcr.io/huggingface/text-generation-inference:latest-intel-cpu + image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu container_name: tgi-llava-xeon-server ports: - "9399:80"