From 18115513936f40832e79756e61c3f4201227f1cc Mon Sep 17 00:00:00 2001 From: Dolpher Du Date: Wed, 16 Oct 2024 15:17:40 +0000 Subject: [PATCH] Add manifests for new components Add manifests for vllm/chathistory/prompt/mongodb Skip ci-*values.yaml in update script. Signed-off-by: Dolpher Du --- helm-charts/update_manifests.sh | 3 + .../config/manifests/chathistory-usvc.yaml | 130 +++++++++++++++ .../config/manifests/mongodb.yaml | 90 +++++++++++ .../config/manifests/prompt-usvc.yaml | 130 +++++++++++++++ .../config/manifests/vllm.yaml | 147 +++++++++++++++++ .../config/manifests/vllm_gaudi.yaml | 149 ++++++++++++++++++ 6 files changed, 649 insertions(+) create mode 100644 microservices-connector/config/manifests/chathistory-usvc.yaml create mode 100644 microservices-connector/config/manifests/mongodb.yaml create mode 100644 microservices-connector/config/manifests/prompt-usvc.yaml create mode 100644 microservices-connector/config/manifests/vllm.yaml create mode 100644 microservices-connector/config/manifests/vllm_gaudi.yaml diff --git a/helm-charts/update_manifests.sh b/helm-charts/update_manifests.sh index 8438c2fd..d57e09a6 100755 --- a/helm-charts/update_manifests.sh +++ b/helm-charts/update_manifests.sh @@ -25,6 +25,9 @@ function generate_yaml { for f in `ls ./common/$chart/*-values.yaml 2>/dev/null `; do filename=$(basename $f) releasename=$chart + if [[ $filename == ci-* ]]; then + continue + fi if [[ "$filename" =~ ^variant_.*-values.yaml ]]; then ext=$(echo $filename | sed 's/^variant_//' | sed 's/-values.yaml$//') outputfile="$ext-${chart}.yaml" diff --git a/microservices-connector/config/manifests/chathistory-usvc.yaml b/microservices-connector/config/manifests/chathistory-usvc.yaml new file mode 100644 index 00000000..88988467 --- /dev/null +++ b/microservices-connector/config/manifests/chathistory-usvc.yaml @@ -0,0 +1,130 @@ +--- +# Source: chathistory-usvc/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: chathistory-usvc-config + labels: + helm.sh/chart: chathistory-usvc-1.0.0 + app.kubernetes.io/name: chathistory-usvc + app.kubernetes.io/instance: chathistory-usvc + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +data: + MONGO_HOST: "chathistory-usvc-mongodb" + MONGO_PORT: "27017" + DB_NAME: "OPEA" + COLLECTION_NAME: "ChatHistory" + http_proxy: "" + https_proxy: "" + no_proxy: "" + LOGFLAG: "" +--- +# Source: chathistory-usvc/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chathistory-usvc + labels: + helm.sh/chart: chathistory-usvc-1.0.0 + app.kubernetes.io/name: chathistory-usvc + app.kubernetes.io/instance: chathistory-usvc + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 6012 + targetPort: 6012 + protocol: TCP + name: port + selector: + app.kubernetes.io/name: chathistory-usvc + app.kubernetes.io/instance: chathistory-usvc +--- +# Source: chathistory-usvc/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chathistory-usvc + labels: + helm.sh/chart: chathistory-usvc-1.0.0 + app.kubernetes.io/name: chathistory-usvc + app.kubernetes.io/instance: chathistory-usvc + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: chathistory-usvc + app.kubernetes.io/instance: chathistory-usvc + template: + metadata: + labels: + app.kubernetes.io/name: chathistory-usvc + app.kubernetes.io/instance: chathistory-usvc + spec: + securityContext: + {} + containers: + - name: chathistory-usvc + envFrom: + - configMapRef: + name: chathistory-usvc-config + - configMapRef: + name: extra-env-config + optional: true + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: false + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "opea/chathistory-mongo-server:latest" + imagePullPolicy: IfNotPresent + ports: + - name: port + containerPort: 6012 + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + livenessProbe: + failureThreshold: 24 + httpGet: + path: v1/health_check + port: port + initialDelaySeconds: 5 + periodSeconds: 5 + readinessProbe: + httpGet: + path: v1/health_check + port: port + initialDelaySeconds: 5 + periodSeconds: 5 + startupProbe: + failureThreshold: 120 + httpGet: + path: v1/health_check + port: port + initialDelaySeconds: 5 + periodSeconds: 5 + resources: + {} + volumes: + - name: tmp + emptyDir: {} diff --git a/microservices-connector/config/manifests/mongodb.yaml b/microservices-connector/config/manifests/mongodb.yaml new file mode 100644 index 00000000..9bf8389d --- /dev/null +++ b/microservices-connector/config/manifests/mongodb.yaml @@ -0,0 +1,90 @@ +--- +# Source: mongodb/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: mongodb + labels: + helm.sh/chart: mongodb-1.0.0 + app.kubernetes.io/name: mongodb + app.kubernetes.io/instance: mongodb + app.kubernetes.io/version: "7.0.11" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 27017 + targetPort: 27017 + protocol: TCP + name: mongodb + selector: + app.kubernetes.io/name: mongodb + app.kubernetes.io/instance: mongodb +--- +# Source: mongodb/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: mongodb + labels: + helm.sh/chart: mongodb-1.0.0 + app.kubernetes.io/name: mongodb + app.kubernetes.io/instance: mongodb + app.kubernetes.io/version: "7.0.11" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: mongodb + app.kubernetes.io/instance: mongodb + template: + metadata: + labels: + app.kubernetes.io/name: mongodb + app.kubernetes.io/instance: mongodb + spec: + securityContext: + {} + containers: + - name: mongodb + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: false + runAsUser: 999 + seccompProfile: + type: RuntimeDefault + image: "mongo:7.0.11" + imagePullPolicy: IfNotPresent + volumeMounts: + - mountPath: /data + name: data-volume + - mountPath: /tmp + name: tmp + ports: + - name: port + containerPort: 27017 + protocol: TCP + startupProbe: + failureThreshold: 120 + initialDelaySeconds: 5 + periodSeconds: 5 + tcpSocket: + port: port + resources: + {} + volumes: + - name: data-volume + emptyDir: {} + - name: tmp + emptyDir: {} diff --git a/microservices-connector/config/manifests/prompt-usvc.yaml b/microservices-connector/config/manifests/prompt-usvc.yaml new file mode 100644 index 00000000..4c5ee30f --- /dev/null +++ b/microservices-connector/config/manifests/prompt-usvc.yaml @@ -0,0 +1,130 @@ +--- +# Source: prompt-usvc/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: prompt-usvc-config + labels: + helm.sh/chart: prompt-usvc-1.0.0 + app.kubernetes.io/name: prompt-usvc + app.kubernetes.io/instance: prompt-usvc + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +data: + MONGO_HOST: "prompt-usvc-mongodb" + MONGO_PORT: "27017" + DB_NAME: "OPEA" + COLLECTION_NAME: "Prompt" + http_proxy: "" + https_proxy: "" + no_proxy: "" + LOGFLAG: "" +--- +# Source: prompt-usvc/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: prompt-usvc + labels: + helm.sh/chart: prompt-usvc-1.0.0 + app.kubernetes.io/name: prompt-usvc + app.kubernetes.io/instance: prompt-usvc + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 6018 + targetPort: 6018 + protocol: TCP + name: port + selector: + app.kubernetes.io/name: prompt-usvc + app.kubernetes.io/instance: prompt-usvc +--- +# Source: prompt-usvc/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: prompt-usvc + labels: + helm.sh/chart: prompt-usvc-1.0.0 + app.kubernetes.io/name: prompt-usvc + app.kubernetes.io/instance: prompt-usvc + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: prompt-usvc + app.kubernetes.io/instance: prompt-usvc + template: + metadata: + labels: + app.kubernetes.io/name: prompt-usvc + app.kubernetes.io/instance: prompt-usvc + spec: + securityContext: + {} + containers: + - name: prompt-usvc + envFrom: + - configMapRef: + name: prompt-usvc-config + - configMapRef: + name: extra-env-config + optional: true + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: false + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "opea/promptregistry-mongo-server:latest" + imagePullPolicy: IfNotPresent + ports: + - name: port + containerPort: 6018 + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + livenessProbe: + failureThreshold: 24 + httpGet: + path: v1/health_check + port: port + initialDelaySeconds: 5 + periodSeconds: 5 + readinessProbe: + httpGet: + path: v1/health_check + port: port + initialDelaySeconds: 5 + periodSeconds: 5 + startupProbe: + failureThreshold: 120 + httpGet: + path: v1/health_check + port: port + initialDelaySeconds: 5 + periodSeconds: 5 + resources: + {} + volumes: + - name: tmp + emptyDir: {} diff --git a/microservices-connector/config/manifests/vllm.yaml b/microservices-connector/config/manifests/vllm.yaml new file mode 100644 index 00000000..4eed5cd4 --- /dev/null +++ b/microservices-connector/config/manifests/vllm.yaml @@ -0,0 +1,147 @@ +--- +# Source: vllm/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: vllm-config + labels: + helm.sh/chart: vllm-1.0.0 + app.kubernetes.io/name: vllm + app.kubernetes.io/instance: vllm + app.kubernetes.io/version: "0.5" + app.kubernetes.io/managed-by: Helm +data: + HF_TOKEN: "insert-your-huggingface-token-here" + http_proxy: "" + https_proxy: "" + no_proxy: "" + HABANA_LOGS: "/tmp/habana_logs" + NUMBA_CACHE_DIR: "/tmp" + HF_HOME: "/tmp/.cache/huggingface" + # https://github.com/outlines-dev/outlines/blob/main/outlines/caching.py#L14-L29 + OUTLINES_CACHE_DIR: "/tmp/.cache/outlines" +--- +# Source: vllm/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: vllm + labels: + helm.sh/chart: vllm-1.0.0 + app.kubernetes.io/name: vllm + app.kubernetes.io/instance: vllm + app.kubernetes.io/version: "0.5" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 80 + targetPort: 2080 + protocol: TCP + name: vllm + selector: + app.kubernetes.io/name: vllm + app.kubernetes.io/instance: vllm +--- +# Source: vllm/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vllm + labels: + helm.sh/chart: vllm-1.0.0 + app.kubernetes.io/name: vllm + app.kubernetes.io/instance: vllm + app.kubernetes.io/version: "0.5" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: vllm + app.kubernetes.io/instance: vllm + template: + metadata: + labels: + app.kubernetes.io/name: vllm + app.kubernetes.io/instance: vllm + spec: + securityContext: + {} + containers: + - name: vllm + envFrom: + - configMapRef: + name: vllm-config + - configMapRef: + name: extra-env-config + optional: true + securityContext: + {} + image: "opea/vllm:latest" + imagePullPolicy: IfNotPresent + args: + - "--enforce-eager" + - "--dtype" + - "auto" + - "--model" + - "Intel/neural-chat-7b-v3-3" + - "--host" + - "0.0.0.0" + - "--port" + - "2080" + - "--download-dir" + - "/data" + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + - mountPath: /tmp + name: tmp + ports: + - name: http + containerPort: 2080 + protocol: TCP + livenessProbe: + failureThreshold: 24 + httpGet: + path: /health + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + readinessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + startupProbe: + failureThreshold: 120 + httpGet: + path: /health + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + resources: + {} + volumes: + - name: model-volume + hostPath: + path: /mnt/opea-models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi + - name: tmp + emptyDir: {} diff --git a/microservices-connector/config/manifests/vllm_gaudi.yaml b/microservices-connector/config/manifests/vllm_gaudi.yaml new file mode 100644 index 00000000..64f9f094 --- /dev/null +++ b/microservices-connector/config/manifests/vllm_gaudi.yaml @@ -0,0 +1,149 @@ +--- +# Source: vllm/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: vllm-config + labels: + helm.sh/chart: vllm-1.0.0 + app.kubernetes.io/name: vllm + app.kubernetes.io/instance: vllm + app.kubernetes.io/version: "0.5" + app.kubernetes.io/managed-by: Helm +data: + HF_TOKEN: "insert-your-huggingface-token-here" + http_proxy: "" + https_proxy: "" + no_proxy: "" + HABANA_LOGS: "/tmp/habana_logs" + NUMBA_CACHE_DIR: "/tmp" + HF_HOME: "/tmp/.cache/huggingface" + # https://github.com/outlines-dev/outlines/blob/main/outlines/caching.py#L14-L29 + OUTLINES_CACHE_DIR: "/tmp/.cache/outlines" + VLLM_CPU_KVCACHE_SPACE: "40" +--- +# Source: vllm/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: vllm + labels: + helm.sh/chart: vllm-1.0.0 + app.kubernetes.io/name: vllm + app.kubernetes.io/instance: vllm + app.kubernetes.io/version: "0.5" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 80 + targetPort: 2080 + protocol: TCP + name: vllm + selector: + app.kubernetes.io/name: vllm + app.kubernetes.io/instance: vllm +--- +# Source: vllm/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vllm + labels: + helm.sh/chart: vllm-1.0.0 + app.kubernetes.io/name: vllm + app.kubernetes.io/instance: vllm + app.kubernetes.io/version: "0.5" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: vllm + app.kubernetes.io/instance: vllm + template: + metadata: + labels: + app.kubernetes.io/name: vllm + app.kubernetes.io/instance: vllm + spec: + securityContext: + {} + containers: + - name: vllm + envFrom: + - configMapRef: + name: vllm-config + - configMapRef: + name: extra-env-config + optional: true + securityContext: + {} + image: "opea/llm-vllm-hpu:latest" + imagePullPolicy: IfNotPresent + args: + - "/bin/bash" + - "-c" + - "python3 -m vllm.entrypoints.openai.api_server --enforce-eager --model Intel/neural-chat-7b-v3-3 --tensor-parallel-size 1 --host 0.0.0.0 --port 2080 --download-dir /data --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048" + - "--model" + - "Intel/neural-chat-7b-v3-3" + - "--host" + - "0.0.0.0" + - "--port" + - "2080" + - "--download-dir" + - "/data" + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + - mountPath: /tmp + name: tmp + ports: + - name: http + containerPort: 2080 + protocol: TCP + livenessProbe: + failureThreshold: 24 + httpGet: + path: /health + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + readinessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + startupProbe: + failureThreshold: 120 + httpGet: + path: /health + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + resources: + limits: + habana.ai/gaudi: 1 + volumes: + - name: model-volume + hostPath: + path: /mnt/opea-models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi + - name: tmp + emptyDir: {}