diff --git a/ChatQnA/benchmark/four_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/four_gaudi/embedding-dependency_run.yaml index 7dcb10342..69dbd7af9 100644 --- a/ChatQnA/benchmark/four_gaudi/embedding-dependency_run.yaml +++ b/ChatQnA/benchmark/four_gaudi/embedding-dependency_run.yaml @@ -7,7 +7,7 @@ metadata: name: embedding-dependency-deploy namespace: default spec: - replicas: 6 + replicas: 4 selector: matchLabels: app: embedding-dependency-deploy @@ -48,7 +48,7 @@ spec: volumes: - name: model-volume hostPath: - path: /home/sdp/cesg + path: /mnt/models type: Directory - name: shm emptyDir: diff --git a/ChatQnA/benchmark/four_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/four_gaudi/llm-dependency_run.yaml index 48b9d69f1..130089f87 100644 --- a/ChatQnA/benchmark/four_gaudi/llm-dependency_run.yaml +++ b/ChatQnA/benchmark/four_gaudi/llm-dependency_run.yaml @@ -35,9 +35,9 @@ spec: - --model-id - $(LLM_MODEL_ID) - --max-input-length - - '1024' - - --max-total-tokens - '2048' + - --max-total-tokens + - '4096' - --max-batch-total-tokens - '65536' - --max-batch-prefill-tokens @@ -62,12 +62,12 @@ spec: - name: HABANA_VISIBLE_DEVICES value: all - name: HF_TOKEN - value: $(HF_TOKEN) + value: ${HF_TOKEN} serviceAccountName: default volumes: - name: model-volume hostPath: - path: /home/sdp/cesg + path: /mnt/models type: Directory - name: shm emptyDir: diff --git a/ChatQnA/benchmark/four_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/four_gaudi/reranking-dependency_run.yaml index 3f595ae1e..af908ecd1 100644 --- a/ChatQnA/benchmark/four_gaudi/reranking-dependency_run.yaml +++ b/ChatQnA/benchmark/four_gaudi/reranking-dependency_run.yaml @@ -31,7 +31,7 @@ spec: - envFrom: - configMapRef: name: qna-config - image: tei_gaudi:rerank + image: opea/tei-gaudi:latest name: reranking-dependency-deploy args: - --model-id @@ -57,14 +57,14 @@ spec: - name: HABANA_VISIBLE_DEVICES value: all - name: HF_TOKEN - value: $(HF_TOKEN) + value: ${HF_TOKEN} - name: MAX_WARMUP_SEQUENCE_LENGTH value: '512' serviceAccountName: default volumes: - name: model-volume hostPath: - path: /home/sdp/cesg + path: /mnt/models type: Directory - name: shm emptyDir: diff --git a/ChatQnA/benchmark/single_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/single_gaudi/chatqna_config_map.yaml index 24b8e72df..368c800e4 100644 --- a/ChatQnA/benchmark/single_gaudi/chatqna_config_map.yaml +++ b/ChatQnA/benchmark/single_gaudi/chatqna_config_map.yaml @@ -15,7 +15,7 @@ data: TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: {HF_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} EMBEDDING_SERVICE_HOST_IP: embedding-svc RETRIEVER_SERVICE_HOST_IP: retriever-svc RERANK_SERVICE_HOST_IP: reranking-svc diff --git a/ChatQnA/benchmark/single_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/single_gaudi/embedding-dependency_run.yaml index d692876aa..f27ffcad0 100644 --- a/ChatQnA/benchmark/single_gaudi/embedding-dependency_run.yaml +++ b/ChatQnA/benchmark/single_gaudi/embedding-dependency_run.yaml @@ -48,7 +48,7 @@ spec: volumes: - name: model-volume hostPath: - path: /home/sdp/cesg + path: /mnt/models type: Directory - name: shm emptyDir: diff --git a/ChatQnA/benchmark/single_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/single_gaudi/llm-dependency_run.yaml index 26c6f8f1c..093d2264b 100644 --- a/ChatQnA/benchmark/single_gaudi/llm-dependency_run.yaml +++ b/ChatQnA/benchmark/single_gaudi/llm-dependency_run.yaml @@ -35,9 +35,9 @@ spec: - --model-id - $(LLM_MODEL_ID) - --max-input-length - - '1024' - - --max-total-tokens - '2048' + - --max-total-tokens + - '4096' - --max-batch-total-tokens - '65536' - --max-batch-prefill-tokens @@ -62,12 +62,12 @@ spec: - name: HABANA_VISIBLE_DEVICES value: all - name: HF_TOKEN - value: $(HF_TOKEN) + value: ${HF_TOKEN} serviceAccountName: default volumes: - name: model-volume hostPath: - path: /home/sdp/cesg + path: /mnt/models type: Directory - name: shm emptyDir: diff --git a/ChatQnA/benchmark/single_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/single_gaudi/reranking-dependency_run.yaml index 3f595ae1e..af908ecd1 100644 --- a/ChatQnA/benchmark/single_gaudi/reranking-dependency_run.yaml +++ b/ChatQnA/benchmark/single_gaudi/reranking-dependency_run.yaml @@ -31,7 +31,7 @@ spec: - envFrom: - configMapRef: name: qna-config - image: tei_gaudi:rerank + image: opea/tei-gaudi:latest name: reranking-dependency-deploy args: - --model-id @@ -57,14 +57,14 @@ spec: - name: HABANA_VISIBLE_DEVICES value: all - name: HF_TOKEN - value: $(HF_TOKEN) + value: ${HF_TOKEN} - name: MAX_WARMUP_SEQUENCE_LENGTH value: '512' serviceAccountName: default volumes: - name: model-volume hostPath: - path: /home/sdp/cesg + path: /mnt/models type: Directory - name: shm emptyDir: diff --git a/ChatQnA/benchmark/two_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/two_gaudi/chatqna_config_map.yaml index 24b8e72df..368c800e4 100644 --- a/ChatQnA/benchmark/two_gaudi/chatqna_config_map.yaml +++ b/ChatQnA/benchmark/two_gaudi/chatqna_config_map.yaml @@ -15,7 +15,7 @@ data: TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: {HF_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} EMBEDDING_SERVICE_HOST_IP: embedding-svc RETRIEVER_SERVICE_HOST_IP: retriever-svc RERANK_SERVICE_HOST_IP: reranking-svc diff --git a/ChatQnA/benchmark/two_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/two_gaudi/embedding-dependency_run.yaml index 89d40715e..485d73402 100644 --- a/ChatQnA/benchmark/two_gaudi/embedding-dependency_run.yaml +++ b/ChatQnA/benchmark/two_gaudi/embedding-dependency_run.yaml @@ -7,7 +7,7 @@ metadata: name: embedding-dependency-deploy namespace: default spec: - replicas: 3 + replicas: 2 selector: matchLabels: app: embedding-dependency-deploy @@ -48,7 +48,7 @@ spec: volumes: - name: model-volume hostPath: - path: /home/sdp/cesg + path: /mnt/models type: Directory - name: shm emptyDir: diff --git a/ChatQnA/benchmark/two_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/two_gaudi/llm-dependency_run.yaml index 4c5c3529b..9499f04ed 100644 --- a/ChatQnA/benchmark/two_gaudi/llm-dependency_run.yaml +++ b/ChatQnA/benchmark/two_gaudi/llm-dependency_run.yaml @@ -35,9 +35,9 @@ spec: - --model-id - $(LLM_MODEL_ID) - --max-input-length - - '1024' - - --max-total-tokens - '2048' + - --max-total-tokens + - '4096' - --max-batch-total-tokens - '65536' - --max-batch-prefill-tokens @@ -62,12 +62,12 @@ spec: - name: HABANA_VISIBLE_DEVICES value: all - name: HF_TOKEN - value: $(HF_TOKEN) + value: ${HF_TOKEN} serviceAccountName: default volumes: - name: model-volume hostPath: - path: /home/sdp/cesg + path: /mnt/models type: Directory - name: shm emptyDir: diff --git a/ChatQnA/benchmark/two_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/two_gaudi/reranking-dependency_run.yaml index 3f595ae1e..af908ecd1 100644 --- a/ChatQnA/benchmark/two_gaudi/reranking-dependency_run.yaml +++ b/ChatQnA/benchmark/two_gaudi/reranking-dependency_run.yaml @@ -31,7 +31,7 @@ spec: - envFrom: - configMapRef: name: qna-config - image: tei_gaudi:rerank + image: opea/tei-gaudi:latest name: reranking-dependency-deploy args: - --model-id @@ -57,14 +57,14 @@ spec: - name: HABANA_VISIBLE_DEVICES value: all - name: HF_TOKEN - value: $(HF_TOKEN) + value: ${HF_TOKEN} - name: MAX_WARMUP_SEQUENCE_LENGTH value: '512' serviceAccountName: default volumes: - name: model-volume hostPath: - path: /home/sdp/cesg + path: /mnt/models type: Directory - name: shm emptyDir: