added more configurations

opea-project · Sep 25, 2024 · c9c36f6 · c9c36f6
1 parent fbaf87a
commit c9c36f6
Show file tree

Hide file tree

Showing 5 changed files with 998 additions and 4 deletions.
diff --git a/ChatQnA/kubernetes/intel/chatqna_charts/README.md b/ChatQnA/kubernetes/intel/chatqna_charts/README.md
@@ -1,5 +1,5 @@
 ## ChatQnA Deployment
-This document demonstrates how to use Helm charts to create ChatQnA pipelines. 
+This document guides you through deploying ChatQnA pipelines using Helm charts. Helm charts simplify managing Kubernetes applications by packaging configuration and resources.
 
 ## Getting Started
 
@@ -8,11 +8,11 @@ This document demonstrates how to use Helm charts to create ChatQnA pipelines.
 # on k8s-master node
 cd GenAIExamples/ChatQnA/kubernetes/intel/chatqna_charts
 
-# set the huggingface token
+# Replace <your token> with your actual Hugging Face token and run the following command:
 HUGGINGFACE_TOKEN=<your token>
 find . -name '*.yaml' -type f -exec sed -i "s#\${HF_TOKEN}#${HUGGINGFACE_TOKEN}#g" {} \;
 
-# set models
+# Replace the following placeholders with the desired model IDs:
 LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
 EMBEDDING_MODEL_ID=BAAI/bge-base-en-v1.5
 RERANK_MODEL_ID=BAAI/bge-reranker-base
@@ -24,10 +24,12 @@ find . -name '*.yaml' -type f -exec sed -i "s#\$(RERANK_MODEL_ID)#${RERANK_MODEL
 
 ### ChatQnA Installation
 ```bash
-
 # Deploy a ChatQnA pipeline using the specified YAML configuration.
 # To deploy with different configurations, simply provide a different YAML file.
 helm install chatqna chatqna_charts/ -f chatqna_charts/oob_single_node.yaml
+
+# Tips: To display rendered manifests according to the given yaml.
+helm template chatqna chatqna_charts/ -f chatqna_charts/oob_single_node.yaml
 ```
 
 Notes: The provided [BKC manifests](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark) for single, two, and four node Kubernetes clusters are generated using this tool.
diff --git a/ChatQnA/kubernetes/intel/chatqna_charts/oob_four_nodes.yaml b/ChatQnA/kubernetes/intel/chatqna_charts/oob_four_nodes.yaml
@@ -0,0 +1,237 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+config:
+ EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
+ EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
+ HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+ INDEX_NAME: rag-redis
+ LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+ LLM_SERVER_HOST_IP: llm-dependency-svc
+ NODE_SELECTOR: chatqna-opea
+ REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
+ RERANK_MODEL_ID: BAAI/bge-reranker-base
+ RERANK_SERVER_HOST_IP: reranking-dependency-svc
+ RETRIEVER_SERVICE_HOST_IP: retriever-svc
+ TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
+ TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
+ TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
+ TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
+
+deployments:
+ - name: chatqna-backend-server-deploy
+ spec:
+ image_name: opea/chatqna-no-wrapper
+ image_tag: latest
+ replicas: 4
+ ports:
+ - containerPort: 8888
+
+ - name: dataprep-deploy
+ spec:
+ image_name: opea/dataprep-redis
+ image_tag: latest
+ replicas: 1
+ ports:
+ - containerPort: 6007
+
+ - name: vector-db
+ spec:
+ image_name: redis/redis-stack
+ image_tag: 7.2.0-v9
+ replicas: 1
+ ports:
+ - containerPort: 6379
+ - containerPort: 8001
+
+ - name: retriever-deploy
+ spec:
+ image_name: opea/retriever-redis
+ image_tag: latest
+ replicas: 4
+ ports:
+ - containerPort: 7000
+
+ - name: embedding-dependency-deploy
+ spec:
+ image_name: ghcr.io/huggingface/text-embeddings-inference
+ image_tag: cpu-1.5
+ replicas: 4
+ ports:
+ - containerPort: 80
+ args:
+ - name: "--model-id"
+ value: $(EMBEDDING_MODEL_ID)
+ - name: "--auto-truncate"
+ volumeMounts:
+ - mountPath: /data
+ name: model-volume
+ - mountPath: /dev/shm
+ name: shm
+ volumes:
+ - hostPath:
+ path: /mnt/models
+ type: Directory
+ name: model-volume
+ - emptyDir:
+ medium: Memory
+ sizeLimit: 1Gi
+ name: shm
+
+ - name: reranking-dependency-deploy
+ spec:
+ image_name: opea/tei-gaudi
+ image_tag: latest
+ replicas: 1
+ resources:
+ limits:
+ habana.ai/gaudi: 1
+ args:
+ - name: "--model-id"
+ - value: $(RERANK_MODEL_ID)
+ - name: "--auto-truncate"
+ env:
+ - name: OMPI_MCA_btl_vader_single_copy_mechanism
+ value: none
+ - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+ value: "true"
+ - name: runtime
+ value: habana
+ - name: HABANA_VISIBLE_DEVICES
+ value: all
+ - name: HF_TOKEN
+ value: ${HF_TOKEN}
+ - name: MAX_WARMUP_SEQUENCE_LENGTH
+ value: "512"
+ volumeMounts:
+ - mountPath: /data
+ name: model-volume
+ - mountPath: /dev/shm
+ name: shm
+ volumes:
+ - hostPath:
+ path: /mnt/models
+ type: Directory
+ name: model-volume
+ - emptyDir:
+ medium: Memory
+ sizeLimit: 1Gi
+ name: shm
+
+ - name: llm-dependency-deploy
+ spec:
+ image_name: ghcr.io/huggingface/tgi-gaudi
+ image_tag: 2.0.4
+ replicas: 31
+ ports:
+ - containerPort: 80
+ resources:
+ limits:
+ habana.ai/gaudi: 1
+ args:
+ - name: "--model-id"
+ value: $(LLM_MODEL_ID)
+ - name: "--max-input-length"
+ value: "2048"
+ - name: "--max-total-tokens"
+ value: "4096"
+ env:
+ - name: OMPI_MCA_btl_vader_single_copy_mechanism
+ value: none
+ - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+ value: "true"
+ - name: runtime
+ value: habana
+ - name: HABANA_VISIBLE_DEVICES
+ value: all
+ - name: HF_TOKEN
+ value: ${HF_TOKEN}
+ volumeMounts:
+ - mountPath: /data
+ name: model-volume
+ - mountPath: /dev/shm
+ name: shm
+ volumes:
+ - hostPath:
+ path: /mnt/models
+ type: Directory
+ name: model-volume
+ - emptyDir:
+ medium: Memory
+ sizeLimit: 1Gi
+ name: shm
+
+services:
+ - name: chatqna-backend-server-svc
+ spec:
+ ports:
+ - name: service
+ nodePort: 30888
+ port: 8888
+ targetPort: 8888
+ selector:
+ app: chatqna-backend-server-deploy
+ type: NodePort
+
+ - name: dataprep-svc
+ spec:
+ ports:
+ - name: port1
+ port: 6007
+ targetPort: 6007
+ selector:
+ app: dataprep-deploy
+ type: ClusterIP
+
+ - name: embedding-dependency-svc
+ spec:
+ ports:
+ - name: service
+ port: 6006
+ targetPort: 80
+ selector:
+ app: embedding-dependency-deploy
+ type: ClusterIP
+
+ - name: llm-dependency-svc
+ spec:
+ ports:
+ - name: service
+ port: 9009
+ targetPort: 80
+ selector:
+ app: llm-dependency-deploy
+ type: ClusterIP
+
+ - name: reranking-dependency-svc
+ spec:
+ ports:
+ - name: service
+ port: 8808
+ targetPort: 80
+ selector:
+ app: reranking-dependency-deploy
+ type: ClusterIP
+
+ - name: retriever-svc
+ spec:
+ ports:
+ - name: service
+ port: 7000
+ targetPort: 7000
+ selector:
+ app: retriever-deploy
+ type: ClusterIP
+
+ - name: vector-db
+ spec:
+ ports:
+ - name: vector-db-service
+ port: 6379
+ targetPort: 6379
+ - name: vector-db-insight
+ port: 8001
+ targetPort: 8001
+ selector:
+ app: vector-db
+ type: ClusterIP