diff --git a/AgentQnA/docker_compose/amd/gpu/rocm/README.md b/AgentQnA/docker_compose/amd/gpu/rocm/README.md
index fe5253ed07..5a74979c89 100644
--- a/AgentQnA/docker_compose/amd/gpu/rocm/README.md
+++ b/AgentQnA/docker_compose/amd/gpu/rocm/README.md
@@ -64,7 +64,7 @@ We remind you that when using a specific version of the code, you need to use th
 - #### Optional. Pull TGI Docker Image (Do this if you want to use TGI)
 
   ```bash
-  docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
+  docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
   ```
 
 - #### Build Docker Images
@@ -110,7 +110,7 @@ We remind you that when using a specific version of the code, you need to use th
 
   ##### TGI-based application:
 
-  - ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
+  - ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
   - opea/agent:latest
   - redis/redis-stack:7.2.0-v9
   - ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
diff --git a/AgentQnA/docker_compose/amd/gpu/rocm/compose.yaml b/AgentQnA/docker_compose/amd/gpu/rocm/compose.yaml
index 4eab372dec..d674aaf0e6 100644
--- a/AgentQnA/docker_compose/amd/gpu/rocm/compose.yaml
+++ b/AgentQnA/docker_compose/amd/gpu/rocm/compose.yaml
@@ -2,7 +2,7 @@
 
 services:
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:3.0.0-rocm
+    image: ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
     container_name: tgi-service
     ports:
       - "${TGI_SERVICE_PORT-8085}:80"
diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml b/AudioQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml
index d421f488fd..5fa82b3d8f 100644
--- a/AudioQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml
+++ b/AudioQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml
@@ -25,7 +25,7 @@ services:
       https_proxy: ${https_proxy}
     restart: unless-stopped
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     container_name: tgi-service
     ports:
       - ${LLM_SERVER_PORT:-3006}:80
diff --git a/AvatarChatbot/docker_compose/intel/cpu/xeon/README.md b/AvatarChatbot/docker_compose/intel/cpu/xeon/README.md
index bf686ce99e..67d9d0b456 100644
--- a/AvatarChatbot/docker_compose/intel/cpu/xeon/README.md
+++ b/AvatarChatbot/docker_compose/intel/cpu/xeon/README.md
@@ -19,7 +19,7 @@ docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build
 
 ### 3. Build LLM Image
 
-Intel Xeon optimized image hosted in huggingface repo will be used for TGI service: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu (https://github.com/huggingface/text-generation-inference)
+Intel Xeon optimized image hosted in huggingface repo will be used for TGI service: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu (https://github.com/huggingface/text-generation-inference)
 
 ### 4. Build TTS Image
 
diff --git a/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml b/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml
index f33449d020..c9748421a0 100644
--- a/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml
@@ -26,7 +26,7 @@ services:
       https_proxy: ${https_proxy}
     restart: unless-stopped
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     container_name: tgi-service
     ports:
       - "3006:80"
diff --git a/AvatarChatbot/docker_compose/intel/hpu/gaudi/README.md b/AvatarChatbot/docker_compose/intel/hpu/gaudi/README.md
index 994d400ce4..ecd4bb5ec8 100644
--- a/AvatarChatbot/docker_compose/intel/hpu/gaudi/README.md
+++ b/AvatarChatbot/docker_compose/intel/hpu/gaudi/README.md
@@ -19,7 +19,7 @@ docker build -t opea/whisper-gaudi:latest --build-arg https_proxy=$https_proxy -
 
 ### 3. Build LLM Image
 
-Intel Gaudi optimized image hosted in huggingface repo will be used for TGI service: ghcr.io/huggingface/tgi-gaudi:2.0.6 (https://github.com/huggingface/tgi-gaudi)
+Intel Gaudi optimized image hosted in huggingface repo will be used for TGI service: ghcr.io/huggingface/tgi-gaudi:2.3.1 (https://github.com/huggingface/tgi-gaudi)
 
 ### 4. Build TTS Image
 
diff --git a/AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml b/AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml
index aba9bb910c..4123034856 100644
--- a/AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -38,7 +38,7 @@ services:
       - SYS_NICE
     restart: unless-stopped
   tgi-service:
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.6
+    image: ghcr.io/huggingface/tgi-gaudi:2.3.1
     container_name: tgi-gaudi-server
     ports:
       - "3006:80"
diff --git a/AvatarChatbot/tests/test_compose_on_gaudi.sh b/AvatarChatbot/tests/test_compose_on_gaudi.sh
index c9d693c415..6bf2b80bcc 100755
--- a/AvatarChatbot/tests/test_compose_on_gaudi.sh
+++ b/AvatarChatbot/tests/test_compose_on_gaudi.sh
@@ -36,7 +36,7 @@ function build_docker_images() {
     service_list="avatarchatbot whisper-gaudi speecht5-gaudi wav2lip-gaudi animation"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.3.1
 
     docker images && sleep 1s
 }
diff --git a/AvatarChatbot/tests/test_compose_on_rocm.sh b/AvatarChatbot/tests/test_compose_on_rocm.sh
index dab4564a2d..c8c49f6df1 100644
--- a/AvatarChatbot/tests/test_compose_on_rocm.sh
+++ b/AvatarChatbot/tests/test_compose_on_rocm.sh
@@ -34,7 +34,7 @@ function build_docker_images() {
     service_list="avatarchatbot whisper asr speecht5 tts wav2lip animation"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
 
     docker images && sleep 3s
 }
diff --git a/AvatarChatbot/tests/test_compose_on_xeon.sh b/AvatarChatbot/tests/test_compose_on_xeon.sh
index b0013aa2af..e572153cbb 100755
--- a/AvatarChatbot/tests/test_compose_on_xeon.sh
+++ b/AvatarChatbot/tests/test_compose_on_xeon.sh
@@ -36,7 +36,7 @@ function build_docker_images() {
     service_list="avatarchatbot whisper speecht5 wav2lip animation"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
 
     docker images && sleep 1s
 }
diff --git a/ChatQnA/docker_compose/amd/gpu/rocm/README.md b/ChatQnA/docker_compose/amd/gpu/rocm/README.md
index 4d968b84eb..c0ec637d37 100644
--- a/ChatQnA/docker_compose/amd/gpu/rocm/README.md
+++ b/ChatQnA/docker_compose/amd/gpu/rocm/README.md
@@ -165,7 +165,7 @@ eaf24161aca8   opea/nginx:latest                                       "/docker-
 05512bd29fee   opea/dataprep:latest                                    "sh -c 'python $( [ …"   37 seconds ago   Up 36 seconds (healthy)   0.0.0.0:18103->5000/tcp, [::]:18103->5000/tcp                                              chatqna-dataprep-service
 49844d339d1d   opea/retriever:latest                                   "python opea_retriev…"   37 seconds ago   Up 36 seconds             0.0.0.0:7000->7000/tcp, [::]:7000->7000/tcp                                                chatqna-retriever
 75b698fe7de0   ghcr.io/huggingface/text-embeddings-inference:cpu-1.5   "text-embeddings-rou…"   37 seconds ago   Up 36 seconds             0.0.0.0:18808->80/tcp, [::]:18808->80/tcp                                                  chatqna-tei-reranking-service
-342f01bfdbb2   ghcr.io/huggingface/text-generation-inference:2.3.1-rocm"python3 /workspace/…"   37 seconds ago   Up 36 seconds             0.0.0.0:18008->8011/tcp, [::]:18008->8011/tcp                                              chatqna-tgi-service
+342f01bfdbb2   ghcr.io/huggingface/text-generation-inference:2.4.1-rocm"python3 /workspace/…"   37 seconds ago   Up 36 seconds             0.0.0.0:18008->8011/tcp, [::]:18008->8011/tcp                                              chatqna-tgi-service
 6081eb1c119d   redis/redis-stack:7.2.0-v9                              "/entrypoint.sh"         37 seconds ago   Up 36 seconds             0.0.0.0:6379->6379/tcp, [::]:6379->6379/tcp, 0.0.0.0:8001->8001/tcp, [::]:8001->8001/tcp   chatqna-redis-vector-db
 eded17420782   ghcr.io/huggingface/text-embeddings-inference:cpu-1.5   "text-embeddings-rou…"   37 seconds ago   Up 36 seconds             0.0.0.0:18090->80/tcp, [::]:18090->80/tcp                                                  chatqna-tei-embedding-service
 ```
@@ -181,7 +181,7 @@ e0ef1ea67640   opea/llm-faqgen:latest                                  "bash ent
 05512bd29fee   opea/dataprep:latest                                    "sh -c 'python $( [ …"   37 seconds ago   Up 36 seconds (healthy)   0.0.0.0:18103->5000/tcp, [::]:18103->5000/tcp                                              chatqna-dataprep-service
 49844d339d1d   opea/retriever:latest                                   "python opea_retriev…"   37 seconds ago   Up 36 seconds             0.0.0.0:7000->7000/tcp, [::]:7000->7000/tcp                                                chatqna-retriever
 75b698fe7de0   ghcr.io/huggingface/text-embeddings-inference:cpu-1.5   "text-embeddings-rou…"   37 seconds ago   Up 36 seconds             0.0.0.0:18808->80/tcp, [::]:18808->80/tcp                                                  chatqna-tei-reranking-service
-342f01bfdbb2   ghcr.io/huggingface/text-generation-inference:2.3.1-rocm"python3 /workspace/…"   37 seconds ago   Up 36 seconds             0.0.0.0:18008->8011/tcp, [::]:18008->8011/tcp                                              chatqna-tgi-service
+342f01bfdbb2   ghcr.io/huggingface/text-generation-inference:2.4.1-rocm"python3 /workspace/…"   37 seconds ago   Up 36 seconds             0.0.0.0:18008->8011/tcp, [::]:18008->8011/tcp                                              chatqna-tgi-service
 6081eb1c119d   redis/redis-stack:7.2.0-v9                              "/entrypoint.sh"         37 seconds ago   Up 36 seconds             0.0.0.0:6379->6379/tcp, [::]:6379->6379/tcp, 0.0.0.0:8001->8001/tcp, [::]:8001->8001/tcp   chatqna-redis-vector-db
 eded17420782   ghcr.io/huggingface/text-embeddings-inference:cpu-1.5   "text-embeddings-rou…"   37 seconds ago   Up 36 seconds             0.0.0.0:18090->80/tcp, [::]:18090->80/tcp                                                  chatqna-tei-embedding-service
 ```
diff --git a/ChatQnA/docker_compose/amd/gpu/rocm/compose.yaml b/ChatQnA/docker_compose/amd/gpu/rocm/compose.yaml
index 14f2eb3312..33f9770629 100644
--- a/ChatQnA/docker_compose/amd/gpu/rocm/compose.yaml
+++ b/ChatQnA/docker_compose/amd/gpu/rocm/compose.yaml
@@ -85,7 +85,7 @@ services:
     command: --model-id ${CHATQNA_RERANK_MODEL_ID} --auto-truncate
 
   chatqna-tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
+    image: ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
     container_name: chatqna-tgi-service
     ports:
       - "${CHATQNA_TGI_SERVICE_PORT}:80"
diff --git a/ChatQnA/docker_compose/amd/gpu/rocm/compose_faqgen.yaml b/ChatQnA/docker_compose/amd/gpu/rocm/compose_faqgen.yaml
index df2a9a42a3..9ffd813134 100644
--- a/ChatQnA/docker_compose/amd/gpu/rocm/compose_faqgen.yaml
+++ b/ChatQnA/docker_compose/amd/gpu/rocm/compose_faqgen.yaml
@@ -85,7 +85,7 @@ services:
     command: --model-id ${CHATQNA_RERANK_MODEL_ID} --auto-truncate
 
   chatqna-tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
+    image: ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
     container_name: chatqna-tgi-service
     ports:
       - "${CHATQNA_TGI_SERVICE_PORT}:80"
diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_faqgen_tgi.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_faqgen_tgi.yaml
index a66be60327..18410bf071 100644
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_faqgen_tgi.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_faqgen_tgi.yaml
@@ -81,7 +81,7 @@ services:
       HF_HUB_ENABLE_HF_TRANSFER: 0
     command: --model-id ${RERANK_MODEL_ID} --auto-truncate
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     container_name: tgi-server
     ports:
       - ${LLM_ENDPOINT_PORT:-9009}:80
diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml
index 434ae34eac..4a509ced70 100644
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml
@@ -81,7 +81,7 @@ services:
       HF_HUB_ENABLE_HF_TRANSFER: 0
     command: --model-id ${RERANK_MODEL_ID} --auto-truncate
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     container_name: tgi-service
     ports:
       - "9009:80"
diff --git a/ChatQnA/kubernetes/gmc/README.md b/ChatQnA/kubernetes/gmc/README.md
index 5775d14b88..29e69d6b21 100644
--- a/ChatQnA/kubernetes/gmc/README.md
+++ b/ChatQnA/kubernetes/gmc/README.md
@@ -18,7 +18,7 @@ The ChatQnA uses the below prebuilt images if you choose a Xeon deployment
 - tei_embedding_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.6
 - retriever: opea/retriever:latest
 - tei_xeon_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.6
-- tgi-service: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+- tgi-service: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
 - chaqna-xeon-backend-server: opea/chatqna:latest
 
 Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services.
diff --git a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
index eec356dd8c..2e25274644 100644
--- a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
@@ -4,7 +4,7 @@
 services:
 
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     container_name: tgi-server
     profiles:
       - codegen-xeon-tgi
diff --git a/CodeGen/tests/test_compose_on_rocm.sh b/CodeGen/tests/test_compose_on_rocm.sh
index 94f006e358..6c7031456e 100644
--- a/CodeGen/tests/test_compose_on_rocm.sh
+++ b/CodeGen/tests/test_compose_on_rocm.sh
@@ -29,7 +29,7 @@ function build_docker_images() {
     service_list="codegen codegen-ui llm-textgen"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
     docker images && sleep 1s
 }
 
diff --git a/CodeGen/tests/test_compose_on_xeon.sh b/CodeGen/tests/test_compose_on_xeon.sh
index 4aaa180ec3..9596aed5af 100644
--- a/CodeGen/tests/test_compose_on_xeon.sh
+++ b/CodeGen/tests/test_compose_on_xeon.sh
@@ -46,7 +46,7 @@ function build_docker_images() {
 
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     docker images && sleep 1s
 }
 
diff --git a/CodeTrans/docker_compose/amd/gpu/rocm/README.md b/CodeTrans/docker_compose/amd/gpu/rocm/README.md
index 9ea891b496..d33364e2b2 100644
--- a/CodeTrans/docker_compose/amd/gpu/rocm/README.md
+++ b/CodeTrans/docker_compose/amd/gpu/rocm/README.md
@@ -150,7 +150,7 @@ eaf24161aca8   opea/nginx:latest                                       "/docker-
 2fce48a4c0f4   opea/codetrans-ui:latest                                  "docker-entrypoint.s…"   37 seconds ago   Up 5 seconds              0.0.0.0:18101->5173/tcp, [::]:18101->5173/tcp                                              codetrans-ui-server
 613c384979f4   opea/codetrans:latest                                     "bash entrypoint.sh"     37 seconds ago   Up 5 seconds              0.0.0.0:18102->8888/tcp, [::]:18102->8888/tcp                                              codetrans-backend-server
 e0ef1ea67640   opea/llm-textgen:latest                                  "bash entrypoint.sh"     37 seconds ago   Up 36 seconds             0.0.0.0:18011->9000/tcp, [::]:18011->9000/tcp                                              codetrans-llm-server
-342f01bfdbb2   ghcr.io/huggingface/text-generation-inference:2.3.1-rocm"python3 /workspace/…"   37 seconds ago   Up 36 seconds             0.0.0.0:18008->8011/tcp, [::]:18008->8011/tcp                                              codetrans-tgi-service
+342f01bfdbb2   ghcr.io/huggingface/text-generation-inference:2.4.1-rocm"python3 /workspace/…"   37 seconds ago   Up 36 seconds             0.0.0.0:18008->8011/tcp, [::]:18008->8011/tcp                                              codetrans-tgi-service
 ```
 
 if used vLLM:
diff --git a/CodeTrans/docker_compose/intel/cpu/xeon/compose_tgi.yaml b/CodeTrans/docker_compose/intel/cpu/xeon/compose_tgi.yaml
index 77c668241c..27b726f8cc 100644
--- a/CodeTrans/docker_compose/intel/cpu/xeon/compose_tgi.yaml
+++ b/CodeTrans/docker_compose/intel/cpu/xeon/compose_tgi.yaml
@@ -3,7 +3,7 @@
 
 services:
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     container_name: codetrans-xeon-tgi-service
     ports:
       - "8008:80"
diff --git a/CodeTrans/docker_compose/intel/hpu/gaudi/compose_tgi.yaml b/CodeTrans/docker_compose/intel/hpu/gaudi/compose_tgi.yaml
index 9bcc01f318..023eed2adf 100644
--- a/CodeTrans/docker_compose/intel/hpu/gaudi/compose_tgi.yaml
+++ b/CodeTrans/docker_compose/intel/hpu/gaudi/compose_tgi.yaml
@@ -3,7 +3,7 @@
 
 services:
   tgi-service:
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.6
+    image: ghcr.io/huggingface/tgi-gaudi:2.3.1
     container_name: codetrans-gaudi-tgi-service
     ports:
       - "8008:80"
diff --git a/CodeTrans/tests/test_compose_tgi_on_gaudi.sh b/CodeTrans/tests/test_compose_tgi_on_gaudi.sh
index 051afce9d4..e1e0a4d3e6 100644
--- a/CodeTrans/tests/test_compose_tgi_on_gaudi.sh
+++ b/CodeTrans/tests/test_compose_tgi_on_gaudi.sh
@@ -29,7 +29,7 @@ function build_docker_images() {
     service_list="codetrans codetrans-ui llm-textgen nginx"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.3.1
     docker images && sleep 1s
 }
 
diff --git a/CodeTrans/tests/test_compose_tgi_on_xeon.sh b/CodeTrans/tests/test_compose_tgi_on_xeon.sh
index 00da9bde73..cb51c13867 100644
--- a/CodeTrans/tests/test_compose_tgi_on_xeon.sh
+++ b/CodeTrans/tests/test_compose_tgi_on_xeon.sh
@@ -29,7 +29,7 @@ function build_docker_images() {
     service_list="codetrans codetrans-ui llm-textgen nginx"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     docker images && sleep 1s
 }
 
diff --git a/DBQnA/docker_compose/intel/cpu/xeon/compose.yaml b/DBQnA/docker_compose/intel/cpu/xeon/compose.yaml
index b96a71d01d..ebfe1f8dec 100644
--- a/DBQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/DBQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -3,7 +3,7 @@
 
 services:
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     container_name: tgi-service
     ports:
       - "8008:80"
diff --git a/DBQnA/tests/test_compose_on_xeon.sh b/DBQnA/tests/test_compose_on_xeon.sh
index c410cc48f8..c3255d484b 100755
--- a/DBQnA/tests/test_compose_on_xeon.sh
+++ b/DBQnA/tests/test_compose_on_xeon.sh
@@ -23,7 +23,7 @@ function build_docker_images() {
     echo "Build all the images with --no-cache, check docker_image_build.log for details..."
     docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     docker images && sleep 1s
 }
 
diff --git a/DocSum/docker_compose/intel/cpu/xeon/compose_tgi.yaml b/DocSum/docker_compose/intel/cpu/xeon/compose_tgi.yaml
index 4b0362bd09..8ab5652b9e 100644
--- a/DocSum/docker_compose/intel/cpu/xeon/compose_tgi.yaml
+++ b/DocSum/docker_compose/intel/cpu/xeon/compose_tgi.yaml
@@ -3,7 +3,7 @@
 
 services:
   tgi-server:
-    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     container_name: docsum-xeon-tgi-server
     ports:
       - ${LLM_ENDPOINT_PORT:-8008}:80
diff --git a/DocSum/kubernetes/gmc/README.md b/DocSum/kubernetes/gmc/README.md
index aaab01a8c8..e6175f1587 100644
--- a/DocSum/kubernetes/gmc/README.md
+++ b/DocSum/kubernetes/gmc/README.md
@@ -8,7 +8,7 @@ Install GMC in your Kubernetes cluster, if you have not already done so, by foll
 The DocSum application is defined as a Custom Resource (CR) file that the above GMC operator acts upon. It first checks if the microservices listed in the CR yaml file are running, if not it starts them and then proceeds to connect them. When the DocSum RAG pipeline is ready, the service endpoint details are returned, letting you use the application. Should you use "kubectl get pods" commands you will see all the component microservices, in particular embedding, retriever, rerank, and llm.
 
 The DocSum pipeline uses  prebuilt images. The Xeon version uses the prebuilt image `llm-docsum-tgi:latest` which internally leverages the
-the image `ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu`. The service is called tgi-svc. Meanwhile, the Gaudi version launches the
+the image `ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu`. The service is called tgi-svc. Meanwhile, the Gaudi version launches the
 service tgi-gaudi-svc, which uses the image `ghcr.io/huggingface/tgi-gaudi:2.3.1`. Both TGI model services serve the model specified in the LLM_MODEL_ID variable that is exported by you. In the below example we use `Intel/neural-chat-7b-v3-3`.
 
 [NOTE]
diff --git a/DocSum/tests/test_compose_on_rocm.sh b/DocSum/tests/test_compose_on_rocm.sh
index ee95ffc0be..1ce4f64734 100644
--- a/DocSum/tests/test_compose_on_rocm.sh
+++ b/DocSum/tests/test_compose_on_rocm.sh
@@ -30,7 +30,7 @@ function build_docker_images() {
     service_list="docsum docsum-gradio-ui whisper llm-docsum"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
     docker images && sleep 3s
 }
 
diff --git a/DocSum/tests/test_compose_tgi_on_xeon.sh b/DocSum/tests/test_compose_tgi_on_xeon.sh
index 4ac895d7a0..a9d83fc54b 100644
--- a/DocSum/tests/test_compose_tgi_on_xeon.sh
+++ b/DocSum/tests/test_compose_tgi_on_xeon.sh
@@ -39,7 +39,7 @@ function build_docker_images() {
     service_list="docsum docsum-gradio-ui whisper llm-docsum"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:1.4
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.1
     docker images && sleep 1s
 }
 
diff --git a/DocSum/tests/test_compose_vllm_on_rocm.sh b/DocSum/tests/test_compose_vllm_on_rocm.sh
index 2eb360f178..744280d4e0 100644
--- a/DocSum/tests/test_compose_vllm_on_rocm.sh
+++ b/DocSum/tests/test_compose_vllm_on_rocm.sh
@@ -30,7 +30,7 @@ function build_docker_images() {
     service_list="docsum docsum-gradio-ui whisper llm-docsum vllm-rocm"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
     docker images && sleep 3s
 }
 
diff --git a/MultimodalQnA/docker_compose/amd/gpu/rocm/README.md b/MultimodalQnA/docker_compose/amd/gpu/rocm/README.md
index 14e66d989a..8489ab366e 100644
--- a/MultimodalQnA/docker_compose/amd/gpu/rocm/README.md
+++ b/MultimodalQnA/docker_compose/amd/gpu/rocm/README.md
@@ -77,7 +77,7 @@ After launching your instance, you can connect to it using SSH (for Linux instan
 - #### Optional. Pull TGI Docker Image (Do this if you want to use TGI)
 
   ```bash
-  docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
+  docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
   ```
 
 - #### Build Docker Images
diff --git a/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml b/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml
index 00a16c1670..99d8c7b551 100644
--- a/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml
@@ -100,7 +100,7 @@ services:
       timeout: 10s
       retries: 60
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     container_name: tgi-service
     ports:
       - "9009:80"
@@ -156,7 +156,7 @@ services:
     ipc: host
     restart: always
   tgi_service_codegen:
-    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     container_name: tgi_service_codegen
     ports:
       - "8028:80"
diff --git a/ProductivitySuite/tests/test_compose_on_xeon.sh b/ProductivitySuite/tests/test_compose_on_xeon.sh
index 57f903ed95..cd2feb3dc5 100755
--- a/ProductivitySuite/tests/test_compose_on_xeon.sh
+++ b/ProductivitySuite/tests/test_compose_on_xeon.sh
@@ -23,7 +23,7 @@ function build_docker_images() {
     docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log
 
     docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.6
-    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     docker images && sleep 1s
 }
 
diff --git a/SearchQnA/docker_compose/intel/cpu/xeon/compose.yaml b/SearchQnA/docker_compose/intel/cpu/xeon/compose.yaml
index 4503a645bb..9b692a3d95 100644
--- a/SearchQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/SearchQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -91,7 +91,7 @@ services:
       LOGFLAG: ${LOGFLAG}
     restart: unless-stopped
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     container_name: tgi-service
     ports:
       - "3006:80"
diff --git a/SearchQnA/tests/test_compose_on_xeon.sh b/SearchQnA/tests/test_compose_on_xeon.sh
index fb5cfaa469..89e4b8e5f9 100644
--- a/SearchQnA/tests/test_compose_on_xeon.sh
+++ b/SearchQnA/tests/test_compose_on_xeon.sh
@@ -36,7 +36,7 @@ function build_docker_images() {
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
     docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.6
-    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     docker images && sleep 1s
 }
 
diff --git a/Translation/docker_compose/intel/cpu/xeon/README.md b/Translation/docker_compose/intel/cpu/xeon/README.md
index 095ca54c38..df5d3214a0 100644
--- a/Translation/docker_compose/intel/cpu/xeon/README.md
+++ b/Translation/docker_compose/intel/cpu/xeon/README.md
@@ -85,7 +85,7 @@ CONTAINER ID   IMAGE                                                           C
 68b8b86a737e   opea/translation-ui:latest                                      "docker-entrypoint.s…"   7 minutes ago   Up About a minute        0.0.0.0:5173->5173/tcp, :::5173->5173/tcp   translation-xeon-ui-server
 8400903275b5   opea/translation:latest                                         "python translation.…"   7 minutes ago   Up About a minute        0.0.0.0:8888->8888/tcp, :::8888->8888/tcp   translation-xeon-backend-server
 2da5545cb18c   opea/llm-textgen:latest                                         "bash entrypoint.sh"     7 minutes ago   Up About a minute        0.0.0.0:9000->9000/tcp, :::9000->9000/tcp   llm-textgen-server
-dee02c1fb538   ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu   "text-generation-lau…"   7 minutes ago   Up 7 minutes (healthy)   0.0.0.0:8008->80/tcp, [::]:8008->80/tcp     tgi-service
+dee02c1fb538   ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu   "text-generation-lau…"   7 minutes ago   Up 7 minutes (healthy)   0.0.0.0:8008->80/tcp, [::]:8008->80/tcp     tgi-service
 ```
 
 ### Test the Pipeline
@@ -125,7 +125,7 @@ The compose.yaml is default compose file using tgi as serving framework
 
 | Service Name                    | Image Name                                                    |
 | ------------------------------- | ------------------------------------------------------------- |
-| tgi-service                     | ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu |
+| tgi-service                     | ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu |
 | llm                             | opea/llm-textgen:latest                                       |
 | translation-xeon-backend-server | opea/translation:latest                                       |
 | translation-xeon-ui-server      | opea/translation-ui:latest                                    |
@@ -137,7 +137,7 @@ The table provides a comprehensive overview of the Translation service utilized
 
 | Service Name                    | Possible Image Names                                          | Optional | Description                                                                                     |
 | ------------------------------- | ------------------------------------------------------------- | -------- | ----------------------------------------------------------------------------------------------- |
-| tgi-service                     | ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu | No       | Specific to the TGI deployment, focuses on text generation inference using Xeon hardware.       |
+| tgi-service                     | ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu | No       | Specific to the TGI deployment, focuses on text generation inference using Xeon hardware.       |
 | llm                             | opea/llm-textgen:latest                                       | No       | Handles large language model (LLM) tasks                                                        |
 | translation-xeon-backend-server | opea/translation:latest                                       | No       | Serves as the backend for the Translation service, with variations depending on the deployment. |
 | translation-xeon-ui-server      | opea/translation-ui:latest                                    | No       | Provides the user interface for the Translation service.                                        |
diff --git a/Translation/docker_compose/intel/cpu/xeon/compose.yaml b/Translation/docker_compose/intel/cpu/xeon/compose.yaml
index 4b77d84484..aeb94f8fdd 100644
--- a/Translation/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/Translation/docker_compose/intel/cpu/xeon/compose.yaml
@@ -3,7 +3,7 @@
 
 services:
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     container_name: tgi-service
     ports:
       - "8008:80"
diff --git a/Translation/tests/test_compose_on_xeon.sh b/Translation/tests/test_compose_on_xeon.sh
index 7eeec8c7a0..8195ea5b3a 100644
--- a/Translation/tests/test_compose_on_xeon.sh
+++ b/Translation/tests/test_compose_on_xeon.sh
@@ -35,7 +35,7 @@ function build_docker_images() {
     service_list="translation translation-ui llm-textgen nginx"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     docker images && sleep 1s
 }
 
diff --git a/VisualQnA/docker_compose/amd/gpu/rocm/README.md b/VisualQnA/docker_compose/amd/gpu/rocm/README.md
index 1647b16b2a..9a582c9dbf 100644
--- a/VisualQnA/docker_compose/amd/gpu/rocm/README.md
+++ b/VisualQnA/docker_compose/amd/gpu/rocm/README.md
@@ -71,7 +71,7 @@
 - #### Optional. Pull TGI Docker Image (Do this if you want to use TGI)
 
   ```bash
-  docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
+  docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
   ```
 
 - #### Build Docker Images
diff --git a/VisualQnA/docker_compose/intel/cpu/xeon/README.md b/VisualQnA/docker_compose/intel/cpu/xeon/README.md
index cfbc3ab1c1..35524d99ed 100644
--- a/VisualQnA/docker_compose/intel/cpu/xeon/README.md
+++ b/VisualQnA/docker_compose/intel/cpu/xeon/README.md
@@ -48,13 +48,13 @@ docker build --no-cache -t opea/visualqna-ui:latest --build-arg https_proxy=$htt
 # vLLM
 docker pull opea/vllm:latest
 # TGI (Optional)
-docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
 ```
 
 Then run the command `docker images`, you will have the following Docker Images:
 
 1. `opea/vllm:latest`
-2. `ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu` (Optional)
+2. `ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu` (Optional)
 3. `opea/lvm:latest`
 4. `opea/visualqna:latest`
 5. `opea/visualqna-ui:latest`
diff --git a/VisualQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml b/VisualQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml
index b595bdcba7..5bacf1108d 100644
--- a/VisualQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml
+++ b/VisualQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml
@@ -3,7 +3,7 @@
 
 services:
   llava-tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     container_name: tgi-llava-xeon-server
     ports:
       - "8399:80"
diff --git a/VisualQnA/tests/test_compose_tgi_on_xeon.sh b/VisualQnA/tests/test_compose_tgi_on_xeon.sh
index 29a009904d..8ef6aadb6a 100644
--- a/VisualQnA/tests/test_compose_tgi_on_xeon.sh
+++ b/VisualQnA/tests/test_compose_tgi_on_xeon.sh
@@ -28,6 +28,8 @@ function build_docker_images() {
     service_list="visualqna visualqna-ui lvm nginx"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
+
     docker images && sleep 1s
 }