Update ai-stack chart to use pvc and secret (#13)

* Update ai-stack chart to use pvc and secret Signed-off-by: Sanket Sudake <sanketsudake@gmail.com> * add reranker support in ai-stack Signed-off-by: Sanket Sudake <sanketsudake@gmail.com> --------- Signed-off-by: Sanket Sudake <sanketsudake@gmail.com>
infracloudio · Jul 18, 2024 · e914691 · e914691
1 parent b103c9d
commit e914691
Show file tree

Hide file tree

Showing 11 changed files with 228 additions and 12 deletions.
diff --git a/charts/ai-stack/Chart.lock b/charts/ai-stack/Chart.lock
@@ -1,9 +1,12 @@
 dependencies:
 - name: text-generation-inference
   repository: https://infracloudio.github.io/charts
-  version: 0.1.1
+  version: 0.1.3
 - name: text-embeddings-inference
   repository: https://infracloudio.github.io/charts
-  version: 0.1.1
-digest: sha256:03b9860baccbba01ab701b07ab16fd12c6558b3a1920f806b623bef2cc5d03bd
-generated: "2024-07-16T15:49:37.371821+05:30"
+  version: 0.1.3
+- name: chromadb
+  repository: https://amikos-tech.github.io/chromadb-chart/
+  version: 0.1.19
+digest: sha256:05ba005e3d493eccad2f831ffa151ee9de81b24597c4f347741d8cb1624feec8
+generated: "2024-07-17T13:32:27.60585+05:30"
diff --git a/charts/ai-stack/Chart.yaml b/charts/ai-stack/Chart.yaml
@@ -15,7 +15,7 @@ type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 0.1.0
+version: 0.2.0
 
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to
@@ -25,13 +25,25 @@ appVersion: ""
 
 dependencies:
   - name: text-generation-inference
-    version: 0.1.1
+    version: 0.1.3
     repository: "https://infracloudio.github.io/charts"
     alias: tgi
     condition: tgi.enabled
-  
+
   - name: text-embeddings-inference
-    version: 0.1.1
+    version: 0.1.3
     repository: "https://infracloudio.github.io/charts"
     alias: tei
-    condition: tei.enabled
+    condition: tei.enabled
+
+  - name: text-embeddings-inference
+    version: 0.1.3
+    repository: "https://infracloudio.github.io/charts"
+    alias: reranker
+    condition: reranker.enabled
+
+  - name: chromadb
+    version: 0.1.19
+    repository: https://amikos-tech.github.io/chromadb-chart/
+    alias: vectordb
+    condition: vectordb.enabled
diff --git a/charts/ai-stack/README.md b/charts/ai-stack/README.md
@@ -20,6 +20,12 @@ See [helm repo](https://helm.sh/docs/helm/helm_repo/) for command documentation.
 
 ## Installing the Chart
 
+Create a secret with HuggingFace token:
+
+```bash
+kubectl create secret generic hf-api-token --from-literal HF_API_TOKEN=<your-huggingface-token>
+```
+
 To install the chart with the release name `ai-stack`:
 
 ```bash

diff --git a/charts/ai-stack/charts/chromadb-0.1.19.tgz b/charts/ai-stack/charts/chromadb-0.1.19.tgz
diff --git a/charts/ai-stack/charts/text-embeddings-inference-0.1.1.tgz b/charts/ai-stack/charts/text-embeddings-inference-0.1.1.tgz
diff --git a/charts/ai-stack/charts/text-embeddings-inference-0.1.3.tgz b/charts/ai-stack/charts/text-embeddings-inference-0.1.3.tgz
diff --git a/charts/ai-stack/charts/text-generation-inference-0.1.1.tgz b/charts/ai-stack/charts/text-generation-inference-0.1.1.tgz
diff --git a/charts/ai-stack/charts/text-generation-inference-0.1.3.tgz b/charts/ai-stack/charts/text-generation-inference-0.1.3.tgz
diff --git a/charts/ai-stack/templates/_helpers.tpl b/charts/ai-stack/templates/_helpers.tpl
@@ -0,0 +1,62 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "ai-stack.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "ai-stack.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "ai-stack.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "ai-stack.labels" -}}
+helm.sh/chart: {{ include "ai-stack.chart" . }}
+{{ include "ai-stack.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "ai-stack.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "ai-stack.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "ai-stack.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "ai-stack.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
diff --git a/charts/ai-stack/templates/pvc.yaml b/charts/ai-stack/templates/pvc.yaml
@@ -0,0 +1,24 @@
+{{- if .Values.huggingface.cache.enabled }}
+{{- if not .Values.huggingface.cache.existingClaim}}
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ .Values.huggingface.cache.persistentVolumeClaim.name }}
+  labels:
+    {{- include "ai-stack.labels" . | nindent 4 }}
+    {{- with .Values.huggingface.cache.persistentVolumeClaim.labels }}
+    {{- toYaml . | nindent 4 }}
+    {{- end }}
+  annotations:
+    {{- with .Values.huggingface.cache.persistentVolumeClaim.annotations }}
+    {{- toYaml . | nindent 4 }}
+    {{- end }}
+spec:
+  storageClassName: {{ .Values.huggingface.cache.persistentVolumeClaim.storageClassName }}
+  accessModes:
+    {{- toYaml .Values.huggingface.cache.persistentVolumeClaim.accessModes | nindent 4 }}
+  resources:
+    requests:
+      storage: {{ .Values.huggingface.cache.persistentVolumeClaim.size }}
+{{- end }}
+{{- end }}
diff --git a/charts/ai-stack/values.yaml b/charts/ai-stack/values.yaml
@@ -2,7 +2,8 @@
 # This is a YAML-formatted file.
 # Declare variables to be passed into your templates.
 
-# Values for the text-generation-inference chart
+# Values for tgi: the text-generation-inference chart
+# Reference: https://artifacthub.io/packages/helm/infracloud-charts/text-generation-inference?modal=values
 tei:
   enabled: true
 
@@ -14,6 +15,11 @@ tei:
       value: "1024"
     - name: RUST_BACKTRACE
       value: "full"
+    - name: HF_API_TOKEN
+      valueFrom:
+        secretKeyRef:
+          name: hf-api-token
+          key: HF_API_TOKEN
 
   resources:
     limits:
@@ -28,7 +34,18 @@ tei:
     type: LoadBalancer
     port: 80
 
-# Values for the text-embeddings-inference chart
+  volumeMounts:
+    - name: hf-cache
+      mountPath: /data
+
+  volumes:
+    - name: hf-cache
+      persistentVolumeClaim:
+        claimName: hf-cache
+
+
+# Values for tei: the text-embeddings-inference chart
+# Reference: https://artifacthub.io/packages/helm/infracloud-charts/text-embeddings-inference?modal=values
 tgi:
   enabled: true
 
@@ -40,6 +57,11 @@ tgi:
       value: "6144"
     - name: MAX_TOTAL_TOKENS
       value: "8192"
+    - name: HF_API_TOKEN
+      valueFrom:
+        secretKeyRef:
+          name: hf-api-token
+          key: HF_API_TOKEN
 
   resources:
     limits:
@@ -52,4 +74,91 @@ tgi:
 
   service:
     type: LoadBalancer
-    port: 80
+    port: 80
+
+  volumeMounts:
+    - name: hf-cache
+      mountPath: /data
+
+  volumes:
+    - name: hf-cache
+      persistentVolumeClaim:
+        claimName: hf-cache
+    - name: shm
+      emptyDir:
+        medium: Memory
+        sizeLimit: "1Gi"
+
+
+# Values for vector: the chromadb chart
+# Reference: https://artifacthub.io/packages/helm/chromadb-helm/chromadb?modal=values
+vectordb:
+  enabled: true
+
+  service:
+    type: LoadBalancer
+
+huggingface:
+
+  cache:
+    enabled: true
+
+    existingClaim: ""
+
+    persistentVolumeClaim:
+      ## Ref: http://kubernetes.io/docs/user-guide/persistent-volum
+      accessModes:
+        - ReadWriteMany
+
+      name: "hf-cache"
+
+      labels: {}
+
+      annotations:
+        nfs.io/storage-path: "hf/hub"
+
+      storageClassName: "controller-nfs"
+
+      size: "50Gi"
+
+
+# Values for reranker: the text-generation-inference chart
+# Reference: https://artifacthub.io/packages/helm/infracloud-charts/text-generation-inference?modal=values
+reranker:
+  enabled: true
+
+  config:
+    modelID: "BAAI/bge-reranker-large"
+
+  env:
+    - name: MAX_CLIENT_BATCH_SIZE
+      value: "1024"
+    - name: RUST_BACKTRACE
+      value: "full"
+    - name: HF_API_TOKEN
+      valueFrom:
+        secretKeyRef:
+          name: hf-api-token
+          key: HF_API_TOKEN
+
+  resources:
+    limits:
+      nvidia.com/gpu: 1
+    requests:
+      nvidia.com/gpu: 1
+
+  strategy:
+    type: Recreate
+
+  service:
+    type: LoadBalancer
+    port: 80
+
+  volumeMounts:
+    - name: hf-cache
+      mountPath: /data
+
+  volumes:
+    - name: hf-cache
+      persistentVolumeClaim:
+        claimName: hf-cache