From a1798b5846f91971b2efb90dc89cb9730a9f4cbf Mon Sep 17 00:00:00 2001
From: dmsuehir <dina.s.jones@intel.com>
Date: Fri, 16 Aug 2024 09:49:29 -0700
Subject: [PATCH 1/2] Minor fixes for CodeGen Xeon and Gaudi Kubernetes
 codegen.yaml and doc updates

Signed-off-by: dmsuehir <dina.s.jones@intel.com>
---
 CodeGen/kubernetes/manifests/README.md          |  8 ++++++--
 CodeGen/kubernetes/manifests/gaudi/codegen.yaml | 14 ++++++++------
 CodeGen/kubernetes/manifests/xeon/codegen.yaml  | 12 ++++++------
 3 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/CodeGen/kubernetes/manifests/README.md b/CodeGen/kubernetes/manifests/README.md
index f6a0763726..4e0a0e0b69 100644
--- a/CodeGen/kubernetes/manifests/README.md
+++ b/CodeGen/kubernetes/manifests/README.md
@@ -6,7 +6,8 @@
 
 > You can also customize the "MODEL_ID" if needed.
 
-> You need to make sure you have created the directory `/mnt/opea-models` to save the cached model on the node where the CodeGEn workload is running. Otherwise, you need to modify the `codegen.yaml` file to change the `model-volume` to a directory that exists on the node.
+> You need to make sure you have created the directory `/mnt/opea-models` to save the cached model on the node where the CodeGen workload is running. Otherwise, you need to modify the `codegen.yaml` file to change the `model-volume` to a directory that exists on the node.
+> Alternatively, you can change the `codegen.yaml` to use a different type of volume, such as a persistent volume claim.
 
 ## Deploy On Xeon
 
@@ -30,10 +31,13 @@ kubectl apply -f codegen.yaml
 
 To verify the installation, run the command `kubectl get pod` to make sure all pods are running.
 
-Then run the command `kubectl port-forward svc/codegen 7778:7778` to expose the CodeGEn service for access.
+Then run the command `kubectl port-forward svc/codegen 7778:7778` to expose the CodeGen service for access.
 
 Open another terminal and run the following command to verify the service if working:
 
+> Note that it may take a couple of minutes for the service to be ready. If the `curl` command below fails, you
+> can check the logs of the codegen-tgi pod to see its status or check for errors.
+
 ```
 kubectl get pods
 curl http://localhost:7778/v1/codegen -H "Content-Type: application/json" -d '{
diff --git a/CodeGen/kubernetes/manifests/gaudi/codegen.yaml b/CodeGen/kubernetes/manifests/gaudi/codegen.yaml
index 810c90f360..1471faf131 100644
--- a/CodeGen/kubernetes/manifests/gaudi/codegen.yaml
+++ b/CodeGen/kubernetes/manifests/gaudi/codegen.yaml
@@ -17,9 +17,9 @@ data:
   TGI_LLM_ENDPOINT: "http://codegen-tgi"
   HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
   HF_HOME: "/tmp/.cache/huggingface"
-  http_proxy:
-  https_proxy:
-  no_proxy:
+  http_proxy: ""
+  https_proxy: ""
+  no_proxy: ""
   LANGCHAIN_TRACING_V2: "false"
   LANGCHAIN_API_KEY: insert-your-langchain-key-here
   LANGCHAIN_PROJECT: "opea-llm-uservice"
@@ -45,9 +45,9 @@ data:
   HF_TOKEN: "insert-your-huggingface-token-here"
   MAX_INPUT_TOKENS: "1024"
   MAX_TOTAL_TOKENS: "4096"
-  http_proxy:
-  https_proxy:
-  no_proxy:
+  http_proxy: ""
+  https_proxy: ""
+  no_proxy: ""
   HABANA_LOGS: "/tmp/habana_logs"
   NUMBA_CACHE_DIR: "/tmp"
   TRANSFORMERS_CACHE: "/tmp/transformers_cache"
@@ -243,6 +243,8 @@ spec:
           resources:
             limits:
               habana.ai/gaudi: 1
+              memory: 64Gi
+              hugepages-2Mi: 4400Mi
       volumes:
         - name: model-volume
           hostPath:
diff --git a/CodeGen/kubernetes/manifests/xeon/codegen.yaml b/CodeGen/kubernetes/manifests/xeon/codegen.yaml
index 06b8a75445..9edadc88c0 100644
--- a/CodeGen/kubernetes/manifests/xeon/codegen.yaml
+++ b/CodeGen/kubernetes/manifests/xeon/codegen.yaml
@@ -17,9 +17,9 @@ data:
   TGI_LLM_ENDPOINT: "http://codegen-tgi"
   HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
   HF_HOME: "/tmp/.cache/huggingface"
-  http_proxy:
-  https_proxy:
-  no_proxy:
+  http_proxy: ""
+  https_proxy: ""
+  no_proxy: ""
   LANGCHAIN_TRACING_V2: "false"
   LANGCHAIN_API_KEY: insert-your-langchain-key-here
   LANGCHAIN_PROJECT: "opea-llm-uservice"
@@ -45,9 +45,9 @@ data:
   HF_TOKEN: "insert-your-huggingface-token-here"
   MAX_INPUT_TOKENS: "1024"
   MAX_TOTAL_TOKENS: "4096"
-  http_proxy:
-  https_proxy:
-  no_proxy:
+  http_proxy: ""
+  https_proxy: ""
+  no_proxy: ""
   HABANA_LOGS: "/tmp/habana_logs"
   NUMBA_CACHE_DIR: "/tmp"
   TRANSFORMERS_CACHE: "/tmp/transformers_cache"

From 3833e52b2299258d008322b8a4bac4228b3037fc Mon Sep 17 00:00:00 2001
From: dmsuehir <dina.s.jones@intel.com>
Date: Tue, 20 Aug 2024 09:00:22 -0700
Subject: [PATCH 2/2] Reduce hugepages

Signed-off-by: dmsuehir <dina.s.jones@intel.com>
---
 CodeGen/kubernetes/manifests/gaudi/codegen.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CodeGen/kubernetes/manifests/gaudi/codegen.yaml b/CodeGen/kubernetes/manifests/gaudi/codegen.yaml
index 2e9e7773bb..b671594caf 100644
--- a/CodeGen/kubernetes/manifests/gaudi/codegen.yaml
+++ b/CodeGen/kubernetes/manifests/gaudi/codegen.yaml
@@ -272,7 +272,7 @@ spec:
             limits:
               habana.ai/gaudi: 1
               memory: 64Gi
-              hugepages-2Mi: 4400Mi
+              hugepages-2Mi: 500Mi
       volumes:
         - name: model-volume
           hostPath:
@@ -343,7 +343,7 @@ spec:
                 #            initialDelaySeconds: 5
                 #            periodSeconds: 5
                 #            failureThreshold: 120
-   CodeGen/kubernetes/manifests/gaudi/codegen.yaml             #          livenessProbe:
+                #          livenessProbe:
                 #            httpGet:
                 #              path: /
                 #              port: 7778