Skip to content

Commit

Permalink
added more configurations
Browse files Browse the repository at this point in the history
  • Loading branch information
Zhenzhong1 committed Sep 25, 2024
1 parent fbaf87a commit c9c36f6
Show file tree
Hide file tree
Showing 5 changed files with 998 additions and 4 deletions.
10 changes: 6 additions & 4 deletions ChatQnA/kubernetes/intel/chatqna_charts/README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
## ChatQnA Deployment
This document demonstrates how to use Helm charts to create ChatQnA pipelines.
This document guides you through deploying ChatQnA pipelines using Helm charts. Helm charts simplify managing Kubernetes applications by packaging configuration and resources.

## Getting Started

Expand All @@ -8,11 +8,11 @@ This document demonstrates how to use Helm charts to create ChatQnA pipelines.
# on k8s-master node
cd GenAIExamples/ChatQnA/kubernetes/intel/chatqna_charts

# set the huggingface token
# Replace <your token> with your actual Hugging Face token and run the following command:
HUGGINGFACE_TOKEN=<your token>
find . -name '*.yaml' -type f -exec sed -i "s#\${HF_TOKEN}#${HUGGINGFACE_TOKEN}#g" {} \;

# set models
# Replace the following placeholders with the desired model IDs:
LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
EMBEDDING_MODEL_ID=BAAI/bge-base-en-v1.5
RERANK_MODEL_ID=BAAI/bge-reranker-base
Expand All @@ -24,10 +24,12 @@ find . -name '*.yaml' -type f -exec sed -i "s#\$(RERANK_MODEL_ID)#${RERANK_MODEL

### ChatQnA Installation
```bash

# Deploy a ChatQnA pipeline using the specified YAML configuration.
# To deploy with different configurations, simply provide a different YAML file.
helm install chatqna chatqna_charts/ -f chatqna_charts/oob_single_node.yaml

# Tips: To display rendered manifests according to the given yaml.
helm template chatqna chatqna_charts/ -f chatqna_charts/oob_single_node.yaml
```

Notes: The provided [BKC manifests](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark) for single, two, and four node Kubernetes clusters are generated using this tool.
237 changes: 237 additions & 0 deletions ChatQnA/kubernetes/intel/chatqna_charts/oob_four_nodes.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,237 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

config:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVER_HOST_IP: llm-dependency-svc
NODE_SELECTOR: chatqna-opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVER_HOST_IP: reranking-dependency-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009

deployments:
- name: chatqna-backend-server-deploy
spec:
image_name: opea/chatqna-no-wrapper
image_tag: latest
replicas: 4
ports:
- containerPort: 8888

- name: dataprep-deploy
spec:
image_name: opea/dataprep-redis
image_tag: latest
replicas: 1
ports:
- containerPort: 6007

- name: vector-db
spec:
image_name: redis/redis-stack
image_tag: 7.2.0-v9
replicas: 1
ports:
- containerPort: 6379
- containerPort: 8001

- name: retriever-deploy
spec:
image_name: opea/retriever-redis
image_tag: latest
replicas: 4
ports:
- containerPort: 7000

- name: embedding-dependency-deploy
spec:
image_name: ghcr.io/huggingface/text-embeddings-inference
image_tag: cpu-1.5
replicas: 4
ports:
- containerPort: 80
args:
- name: "--model-id"
value: $(EMBEDDING_MODEL_ID)
- name: "--auto-truncate"
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm

- name: reranking-dependency-deploy
spec:
image_name: opea/tei-gaudi
image_tag: latest
replicas: 1
resources:
limits:
habana.ai/gaudi: 1
args:
- name: "--model-id"
- value: $(RERANK_MODEL_ID)
- name: "--auto-truncate"
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: "true"
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
- name: MAX_WARMUP_SEQUENCE_LENGTH
value: "512"
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm

- name: llm-dependency-deploy
spec:
image_name: ghcr.io/huggingface/tgi-gaudi
image_tag: 2.0.4
replicas: 31
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
args:
- name: "--model-id"
value: $(LLM_MODEL_ID)
- name: "--max-input-length"
value: "2048"
- name: "--max-total-tokens"
value: "4096"
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: "true"
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm

services:
- name: chatqna-backend-server-svc
spec:
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
selector:
app: chatqna-backend-server-deploy
type: NodePort

- name: dataprep-svc
spec:
ports:
- name: port1
port: 6007
targetPort: 6007
selector:
app: dataprep-deploy
type: ClusterIP

- name: embedding-dependency-svc
spec:
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP

- name: llm-dependency-svc
spec:
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP

- name: reranking-dependency-svc
spec:
ports:
- name: service
port: 8808
targetPort: 80
selector:
app: reranking-dependency-deploy
type: ClusterIP

- name: retriever-svc
spec:
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP

- name: vector-db
spec:
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP
Loading

0 comments on commit c9c36f6

Please sign in to comment.