From a1798b5846f91971b2efb90dc89cb9730a9f4cbf Mon Sep 17 00:00:00 2001 From: dmsuehir Date: Fri, 16 Aug 2024 09:49:29 -0700 Subject: [PATCH 1/2] Minor fixes for CodeGen Xeon and Gaudi Kubernetes codegen.yaml and doc updates Signed-off-by: dmsuehir --- CodeGen/kubernetes/manifests/README.md | 8 ++++++-- CodeGen/kubernetes/manifests/gaudi/codegen.yaml | 14 ++++++++------ CodeGen/kubernetes/manifests/xeon/codegen.yaml | 12 ++++++------ 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/CodeGen/kubernetes/manifests/README.md b/CodeGen/kubernetes/manifests/README.md index f6a0763726..4e0a0e0b69 100644 --- a/CodeGen/kubernetes/manifests/README.md +++ b/CodeGen/kubernetes/manifests/README.md @@ -6,7 +6,8 @@ > You can also customize the "MODEL_ID" if needed. -> You need to make sure you have created the directory `/mnt/opea-models` to save the cached model on the node where the CodeGEn workload is running. Otherwise, you need to modify the `codegen.yaml` file to change the `model-volume` to a directory that exists on the node. +> You need to make sure you have created the directory `/mnt/opea-models` to save the cached model on the node where the CodeGen workload is running. Otherwise, you need to modify the `codegen.yaml` file to change the `model-volume` to a directory that exists on the node. +> Alternatively, you can change the `codegen.yaml` to use a different type of volume, such as a persistent volume claim. ## Deploy On Xeon @@ -30,10 +31,13 @@ kubectl apply -f codegen.yaml To verify the installation, run the command `kubectl get pod` to make sure all pods are running. -Then run the command `kubectl port-forward svc/codegen 7778:7778` to expose the CodeGEn service for access. +Then run the command `kubectl port-forward svc/codegen 7778:7778` to expose the CodeGen service for access. Open another terminal and run the following command to verify the service if working: +> Note that it may take a couple of minutes for the service to be ready. If the `curl` command below fails, you +> can check the logs of the codegen-tgi pod to see its status or check for errors. + ``` kubectl get pods curl http://localhost:7778/v1/codegen -H "Content-Type: application/json" -d '{ diff --git a/CodeGen/kubernetes/manifests/gaudi/codegen.yaml b/CodeGen/kubernetes/manifests/gaudi/codegen.yaml index 810c90f360..1471faf131 100644 --- a/CodeGen/kubernetes/manifests/gaudi/codegen.yaml +++ b/CodeGen/kubernetes/manifests/gaudi/codegen.yaml @@ -17,9 +17,9 @@ data: TGI_LLM_ENDPOINT: "http://codegen-tgi" HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" HF_HOME: "/tmp/.cache/huggingface" - http_proxy: - https_proxy: - no_proxy: + http_proxy: "" + https_proxy: "" + no_proxy: "" LANGCHAIN_TRACING_V2: "false" LANGCHAIN_API_KEY: insert-your-langchain-key-here LANGCHAIN_PROJECT: "opea-llm-uservice" @@ -45,9 +45,9 @@ data: HF_TOKEN: "insert-your-huggingface-token-here" MAX_INPUT_TOKENS: "1024" MAX_TOTAL_TOKENS: "4096" - http_proxy: - https_proxy: - no_proxy: + http_proxy: "" + https_proxy: "" + no_proxy: "" HABANA_LOGS: "/tmp/habana_logs" NUMBA_CACHE_DIR: "/tmp" TRANSFORMERS_CACHE: "/tmp/transformers_cache" @@ -243,6 +243,8 @@ spec: resources: limits: habana.ai/gaudi: 1 + memory: 64Gi + hugepages-2Mi: 4400Mi volumes: - name: model-volume hostPath: diff --git a/CodeGen/kubernetes/manifests/xeon/codegen.yaml b/CodeGen/kubernetes/manifests/xeon/codegen.yaml index 06b8a75445..9edadc88c0 100644 --- a/CodeGen/kubernetes/manifests/xeon/codegen.yaml +++ b/CodeGen/kubernetes/manifests/xeon/codegen.yaml @@ -17,9 +17,9 @@ data: TGI_LLM_ENDPOINT: "http://codegen-tgi" HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" HF_HOME: "/tmp/.cache/huggingface" - http_proxy: - https_proxy: - no_proxy: + http_proxy: "" + https_proxy: "" + no_proxy: "" LANGCHAIN_TRACING_V2: "false" LANGCHAIN_API_KEY: insert-your-langchain-key-here LANGCHAIN_PROJECT: "opea-llm-uservice" @@ -45,9 +45,9 @@ data: HF_TOKEN: "insert-your-huggingface-token-here" MAX_INPUT_TOKENS: "1024" MAX_TOTAL_TOKENS: "4096" - http_proxy: - https_proxy: - no_proxy: + http_proxy: "" + https_proxy: "" + no_proxy: "" HABANA_LOGS: "/tmp/habana_logs" NUMBA_CACHE_DIR: "/tmp" TRANSFORMERS_CACHE: "/tmp/transformers_cache" From 3833e52b2299258d008322b8a4bac4228b3037fc Mon Sep 17 00:00:00 2001 From: dmsuehir Date: Tue, 20 Aug 2024 09:00:22 -0700 Subject: [PATCH 2/2] Reduce hugepages Signed-off-by: dmsuehir --- CodeGen/kubernetes/manifests/gaudi/codegen.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CodeGen/kubernetes/manifests/gaudi/codegen.yaml b/CodeGen/kubernetes/manifests/gaudi/codegen.yaml index 2e9e7773bb..b671594caf 100644 --- a/CodeGen/kubernetes/manifests/gaudi/codegen.yaml +++ b/CodeGen/kubernetes/manifests/gaudi/codegen.yaml @@ -272,7 +272,7 @@ spec: limits: habana.ai/gaudi: 1 memory: 64Gi - hugepages-2Mi: 4400Mi + hugepages-2Mi: 500Mi volumes: - name: model-volume hostPath: @@ -343,7 +343,7 @@ spec: # initialDelaySeconds: 5 # periodSeconds: 5 # failureThreshold: 120 - CodeGen/kubernetes/manifests/gaudi/codegen.yaml # livenessProbe: + # livenessProbe: # httpGet: # path: / # port: 7778