From f532ecdc3cd13d96a3697776746939145eade334 Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Mon, 19 Aug 2024 20:42:27 -0700 Subject: [PATCH 1/6] update manifests for v0.9 --- ChatQnA/benchmark/four_gaudi/llm-dependency_run.yaml | 4 ++-- ChatQnA/benchmark/single_gaudi/llm-dependency_run.yaml | 4 ++-- ChatQnA/benchmark/two_gaudi/llm-dependency_run.yaml | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/ChatQnA/benchmark/four_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/four_gaudi/llm-dependency_run.yaml index 48b9d69f1..2a91c2e8b 100644 --- a/ChatQnA/benchmark/four_gaudi/llm-dependency_run.yaml +++ b/ChatQnA/benchmark/four_gaudi/llm-dependency_run.yaml @@ -35,9 +35,9 @@ spec: - --model-id - $(LLM_MODEL_ID) - --max-input-length - - '1024' - - --max-total-tokens - '2048' + - --max-total-tokens + - '4096' - --max-batch-total-tokens - '65536' - --max-batch-prefill-tokens diff --git a/ChatQnA/benchmark/single_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/single_gaudi/llm-dependency_run.yaml index 26c6f8f1c..d74c538c1 100644 --- a/ChatQnA/benchmark/single_gaudi/llm-dependency_run.yaml +++ b/ChatQnA/benchmark/single_gaudi/llm-dependency_run.yaml @@ -35,9 +35,9 @@ spec: - --model-id - $(LLM_MODEL_ID) - --max-input-length - - '1024' - - --max-total-tokens - '2048' + - --max-total-tokens + - '4096' - --max-batch-total-tokens - '65536' - --max-batch-prefill-tokens diff --git a/ChatQnA/benchmark/two_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/two_gaudi/llm-dependency_run.yaml index 4c5c3529b..d63b2f72a 100644 --- a/ChatQnA/benchmark/two_gaudi/llm-dependency_run.yaml +++ b/ChatQnA/benchmark/two_gaudi/llm-dependency_run.yaml @@ -35,9 +35,9 @@ spec: - --model-id - $(LLM_MODEL_ID) - --max-input-length - - '1024' - - --max-total-tokens - '2048' + - --max-total-tokens + - '4096' - --max-batch-total-tokens - '65536' - --max-batch-prefill-tokens From 74bf3e9973f5d968b2a4ee605079287b71b91d8a Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Mon, 19 Aug 2024 23:04:22 -0700 Subject: [PATCH 2/6] update embedding manifestss for v0.9 --- ChatQnA/benchmark/four_gaudi/embedding-dependency_run.yaml | 2 +- ChatQnA/benchmark/two_gaudi/embedding-dependency_run.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ChatQnA/benchmark/four_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/four_gaudi/embedding-dependency_run.yaml index 7dcb10342..00852e33f 100644 --- a/ChatQnA/benchmark/four_gaudi/embedding-dependency_run.yaml +++ b/ChatQnA/benchmark/four_gaudi/embedding-dependency_run.yaml @@ -7,7 +7,7 @@ metadata: name: embedding-dependency-deploy namespace: default spec: - replicas: 6 + replicas: 4 selector: matchLabels: app: embedding-dependency-deploy diff --git a/ChatQnA/benchmark/two_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/two_gaudi/embedding-dependency_run.yaml index 89d40715e..abee57594 100644 --- a/ChatQnA/benchmark/two_gaudi/embedding-dependency_run.yaml +++ b/ChatQnA/benchmark/two_gaudi/embedding-dependency_run.yaml @@ -7,7 +7,7 @@ metadata: name: embedding-dependency-deploy namespace: default spec: - replicas: 3 + replicas: 2 selector: matchLabels: app: embedding-dependency-deploy From 0a79349a5f2442d495d439d7b8dc4ff9d1f657a0 Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Mon, 19 Aug 2024 23:11:23 -0700 Subject: [PATCH 3/6] update CgatQnA manifestss for v0.9 --- ChatQnA/benchmark/single_gaudi/chatqna_config_map.yaml | 2 +- ChatQnA/benchmark/two_gaudi/chatqna_config_map.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ChatQnA/benchmark/single_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/single_gaudi/chatqna_config_map.yaml index 24b8e72df..368c800e4 100644 --- a/ChatQnA/benchmark/single_gaudi/chatqna_config_map.yaml +++ b/ChatQnA/benchmark/single_gaudi/chatqna_config_map.yaml @@ -15,7 +15,7 @@ data: TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: {HF_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} EMBEDDING_SERVICE_HOST_IP: embedding-svc RETRIEVER_SERVICE_HOST_IP: retriever-svc RERANK_SERVICE_HOST_IP: reranking-svc diff --git a/ChatQnA/benchmark/two_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/two_gaudi/chatqna_config_map.yaml index 24b8e72df..368c800e4 100644 --- a/ChatQnA/benchmark/two_gaudi/chatqna_config_map.yaml +++ b/ChatQnA/benchmark/two_gaudi/chatqna_config_map.yaml @@ -15,7 +15,7 @@ data: TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: {HF_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} EMBEDDING_SERVICE_HOST_IP: embedding-svc RETRIEVER_SERVICE_HOST_IP: retriever-svc RERANK_SERVICE_HOST_IP: reranking-svc From 6a23a823fa2ada31e0b5a2488904963baafbcbf1 Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Mon, 19 Aug 2024 23:15:00 -0700 Subject: [PATCH 4/6] update model mount path for v0.9 --- ChatQnA/benchmark/four_gaudi/embedding-dependency_run.yaml | 2 +- ChatQnA/benchmark/four_gaudi/llm-dependency_run.yaml | 2 +- ChatQnA/benchmark/four_gaudi/reranking-dependency_run.yaml | 2 +- ChatQnA/benchmark/single_gaudi/embedding-dependency_run.yaml | 2 +- ChatQnA/benchmark/single_gaudi/llm-dependency_run.yaml | 2 +- ChatQnA/benchmark/single_gaudi/reranking-dependency_run.yaml | 2 +- ChatQnA/benchmark/two_gaudi/embedding-dependency_run.yaml | 2 +- ChatQnA/benchmark/two_gaudi/llm-dependency_run.yaml | 2 +- ChatQnA/benchmark/two_gaudi/reranking-dependency_run.yaml | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/ChatQnA/benchmark/four_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/four_gaudi/embedding-dependency_run.yaml index 00852e33f..69dbd7af9 100644 --- a/ChatQnA/benchmark/four_gaudi/embedding-dependency_run.yaml +++ b/ChatQnA/benchmark/four_gaudi/embedding-dependency_run.yaml @@ -48,7 +48,7 @@ spec: volumes: - name: model-volume hostPath: - path: /home/sdp/cesg + path: /mnt/models type: Directory - name: shm emptyDir: diff --git a/ChatQnA/benchmark/four_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/four_gaudi/llm-dependency_run.yaml index 2a91c2e8b..a1bb1c2ac 100644 --- a/ChatQnA/benchmark/four_gaudi/llm-dependency_run.yaml +++ b/ChatQnA/benchmark/four_gaudi/llm-dependency_run.yaml @@ -67,7 +67,7 @@ spec: volumes: - name: model-volume hostPath: - path: /home/sdp/cesg + path: /mnt/models type: Directory - name: shm emptyDir: diff --git a/ChatQnA/benchmark/four_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/four_gaudi/reranking-dependency_run.yaml index 3f595ae1e..a8001a25b 100644 --- a/ChatQnA/benchmark/four_gaudi/reranking-dependency_run.yaml +++ b/ChatQnA/benchmark/four_gaudi/reranking-dependency_run.yaml @@ -64,7 +64,7 @@ spec: volumes: - name: model-volume hostPath: - path: /home/sdp/cesg + path: /mnt/models type: Directory - name: shm emptyDir: diff --git a/ChatQnA/benchmark/single_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/single_gaudi/embedding-dependency_run.yaml index d692876aa..f27ffcad0 100644 --- a/ChatQnA/benchmark/single_gaudi/embedding-dependency_run.yaml +++ b/ChatQnA/benchmark/single_gaudi/embedding-dependency_run.yaml @@ -48,7 +48,7 @@ spec: volumes: - name: model-volume hostPath: - path: /home/sdp/cesg + path: /mnt/models type: Directory - name: shm emptyDir: diff --git a/ChatQnA/benchmark/single_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/single_gaudi/llm-dependency_run.yaml index d74c538c1..0a1fe34a7 100644 --- a/ChatQnA/benchmark/single_gaudi/llm-dependency_run.yaml +++ b/ChatQnA/benchmark/single_gaudi/llm-dependency_run.yaml @@ -67,7 +67,7 @@ spec: volumes: - name: model-volume hostPath: - path: /home/sdp/cesg + path: /mnt/models type: Directory - name: shm emptyDir: diff --git a/ChatQnA/benchmark/single_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/single_gaudi/reranking-dependency_run.yaml index 3f595ae1e..a8001a25b 100644 --- a/ChatQnA/benchmark/single_gaudi/reranking-dependency_run.yaml +++ b/ChatQnA/benchmark/single_gaudi/reranking-dependency_run.yaml @@ -64,7 +64,7 @@ spec: volumes: - name: model-volume hostPath: - path: /home/sdp/cesg + path: /mnt/models type: Directory - name: shm emptyDir: diff --git a/ChatQnA/benchmark/two_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/two_gaudi/embedding-dependency_run.yaml index abee57594..485d73402 100644 --- a/ChatQnA/benchmark/two_gaudi/embedding-dependency_run.yaml +++ b/ChatQnA/benchmark/two_gaudi/embedding-dependency_run.yaml @@ -48,7 +48,7 @@ spec: volumes: - name: model-volume hostPath: - path: /home/sdp/cesg + path: /mnt/models type: Directory - name: shm emptyDir: diff --git a/ChatQnA/benchmark/two_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/two_gaudi/llm-dependency_run.yaml index d63b2f72a..fb16a0dbc 100644 --- a/ChatQnA/benchmark/two_gaudi/llm-dependency_run.yaml +++ b/ChatQnA/benchmark/two_gaudi/llm-dependency_run.yaml @@ -67,7 +67,7 @@ spec: volumes: - name: model-volume hostPath: - path: /home/sdp/cesg + path: /mnt/models type: Directory - name: shm emptyDir: diff --git a/ChatQnA/benchmark/two_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/two_gaudi/reranking-dependency_run.yaml index 3f595ae1e..a8001a25b 100644 --- a/ChatQnA/benchmark/two_gaudi/reranking-dependency_run.yaml +++ b/ChatQnA/benchmark/two_gaudi/reranking-dependency_run.yaml @@ -64,7 +64,7 @@ spec: volumes: - name: model-volume hostPath: - path: /home/sdp/cesg + path: /mnt/models type: Directory - name: shm emptyDir: From 4f25f47155964d01723c81ad1a9202cef7466821 Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Mon, 19 Aug 2024 23:29:52 -0700 Subject: [PATCH 5/6] update model HF TOKEN variables & reranking name for v0.9 --- ChatQnA/benchmark/four_gaudi/llm-dependency_run.yaml | 2 +- ChatQnA/benchmark/four_gaudi/reranking-dependency_run.yaml | 4 ++-- ChatQnA/benchmark/single_gaudi/llm-dependency_run.yaml | 2 +- ChatQnA/benchmark/single_gaudi/reranking-dependency_run.yaml | 4 ++-- ChatQnA/benchmark/two_gaudi/llm-dependency_run.yaml | 2 +- ChatQnA/benchmark/two_gaudi/reranking-dependency_run.yaml | 4 ++-- 6 files changed, 9 insertions(+), 9 deletions(-) diff --git a/ChatQnA/benchmark/four_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/four_gaudi/llm-dependency_run.yaml index a1bb1c2ac..130089f87 100644 --- a/ChatQnA/benchmark/four_gaudi/llm-dependency_run.yaml +++ b/ChatQnA/benchmark/four_gaudi/llm-dependency_run.yaml @@ -62,7 +62,7 @@ spec: - name: HABANA_VISIBLE_DEVICES value: all - name: HF_TOKEN - value: $(HF_TOKEN) + value: ${HF_TOKEN} serviceAccountName: default volumes: - name: model-volume diff --git a/ChatQnA/benchmark/four_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/four_gaudi/reranking-dependency_run.yaml index a8001a25b..d8c0621ba 100644 --- a/ChatQnA/benchmark/four_gaudi/reranking-dependency_run.yaml +++ b/ChatQnA/benchmark/four_gaudi/reranking-dependency_run.yaml @@ -31,7 +31,7 @@ spec: - envFrom: - configMapRef: name: qna-config - image: tei_gaudi:rerank + image: opea/tei-gaudi:lates name: reranking-dependency-deploy args: - --model-id @@ -57,7 +57,7 @@ spec: - name: HABANA_VISIBLE_DEVICES value: all - name: HF_TOKEN - value: $(HF_TOKEN) + value: ${HF_TOKEN} - name: MAX_WARMUP_SEQUENCE_LENGTH value: '512' serviceAccountName: default diff --git a/ChatQnA/benchmark/single_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/single_gaudi/llm-dependency_run.yaml index 0a1fe34a7..093d2264b 100644 --- a/ChatQnA/benchmark/single_gaudi/llm-dependency_run.yaml +++ b/ChatQnA/benchmark/single_gaudi/llm-dependency_run.yaml @@ -62,7 +62,7 @@ spec: - name: HABANA_VISIBLE_DEVICES value: all - name: HF_TOKEN - value: $(HF_TOKEN) + value: ${HF_TOKEN} serviceAccountName: default volumes: - name: model-volume diff --git a/ChatQnA/benchmark/single_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/single_gaudi/reranking-dependency_run.yaml index a8001a25b..d8c0621ba 100644 --- a/ChatQnA/benchmark/single_gaudi/reranking-dependency_run.yaml +++ b/ChatQnA/benchmark/single_gaudi/reranking-dependency_run.yaml @@ -31,7 +31,7 @@ spec: - envFrom: - configMapRef: name: qna-config - image: tei_gaudi:rerank + image: opea/tei-gaudi:lates name: reranking-dependency-deploy args: - --model-id @@ -57,7 +57,7 @@ spec: - name: HABANA_VISIBLE_DEVICES value: all - name: HF_TOKEN - value: $(HF_TOKEN) + value: ${HF_TOKEN} - name: MAX_WARMUP_SEQUENCE_LENGTH value: '512' serviceAccountName: default diff --git a/ChatQnA/benchmark/two_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/two_gaudi/llm-dependency_run.yaml index fb16a0dbc..9499f04ed 100644 --- a/ChatQnA/benchmark/two_gaudi/llm-dependency_run.yaml +++ b/ChatQnA/benchmark/two_gaudi/llm-dependency_run.yaml @@ -62,7 +62,7 @@ spec: - name: HABANA_VISIBLE_DEVICES value: all - name: HF_TOKEN - value: $(HF_TOKEN) + value: ${HF_TOKEN} serviceAccountName: default volumes: - name: model-volume diff --git a/ChatQnA/benchmark/two_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/two_gaudi/reranking-dependency_run.yaml index a8001a25b..d8c0621ba 100644 --- a/ChatQnA/benchmark/two_gaudi/reranking-dependency_run.yaml +++ b/ChatQnA/benchmark/two_gaudi/reranking-dependency_run.yaml @@ -31,7 +31,7 @@ spec: - envFrom: - configMapRef: name: qna-config - image: tei_gaudi:rerank + image: opea/tei-gaudi:lates name: reranking-dependency-deploy args: - --model-id @@ -57,7 +57,7 @@ spec: - name: HABANA_VISIBLE_DEVICES value: all - name: HF_TOKEN - value: $(HF_TOKEN) + value: ${HF_TOKEN} - name: MAX_WARMUP_SEQUENCE_LENGTH value: '512' serviceAccountName: default From b8f3014d0360a336ed5434b841b8fda146073733 Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Mon, 19 Aug 2024 23:31:01 -0700 Subject: [PATCH 6/6] update model HF TOKEN variables & reranking name for v0.9 --- ChatQnA/benchmark/four_gaudi/reranking-dependency_run.yaml | 2 +- ChatQnA/benchmark/single_gaudi/reranking-dependency_run.yaml | 2 +- ChatQnA/benchmark/two_gaudi/reranking-dependency_run.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ChatQnA/benchmark/four_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/four_gaudi/reranking-dependency_run.yaml index d8c0621ba..af908ecd1 100644 --- a/ChatQnA/benchmark/four_gaudi/reranking-dependency_run.yaml +++ b/ChatQnA/benchmark/four_gaudi/reranking-dependency_run.yaml @@ -31,7 +31,7 @@ spec: - envFrom: - configMapRef: name: qna-config - image: opea/tei-gaudi:lates + image: opea/tei-gaudi:latest name: reranking-dependency-deploy args: - --model-id diff --git a/ChatQnA/benchmark/single_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/single_gaudi/reranking-dependency_run.yaml index d8c0621ba..af908ecd1 100644 --- a/ChatQnA/benchmark/single_gaudi/reranking-dependency_run.yaml +++ b/ChatQnA/benchmark/single_gaudi/reranking-dependency_run.yaml @@ -31,7 +31,7 @@ spec: - envFrom: - configMapRef: name: qna-config - image: opea/tei-gaudi:lates + image: opea/tei-gaudi:latest name: reranking-dependency-deploy args: - --model-id diff --git a/ChatQnA/benchmark/two_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/two_gaudi/reranking-dependency_run.yaml index d8c0621ba..af908ecd1 100644 --- a/ChatQnA/benchmark/two_gaudi/reranking-dependency_run.yaml +++ b/ChatQnA/benchmark/two_gaudi/reranking-dependency_run.yaml @@ -31,7 +31,7 @@ spec: - envFrom: - configMapRef: name: qna-config - image: opea/tei-gaudi:lates + image: opea/tei-gaudi:latest name: reranking-dependency-deploy args: - --model-id