Skip to content

Commit

Permalink
Update ai-stack chart to use pvc and secret (#13)
Browse files Browse the repository at this point in the history
* Update ai-stack chart to use pvc and secret

Signed-off-by: Sanket Sudake <sanketsudake@gmail.com>

* add reranker support in ai-stack

Signed-off-by: Sanket Sudake <sanketsudake@gmail.com>

---------

Signed-off-by: Sanket Sudake <sanketsudake@gmail.com>
  • Loading branch information
sanketsudake authored Jul 18, 2024
1 parent b103c9d commit e914691
Show file tree
Hide file tree
Showing 11 changed files with 228 additions and 12 deletions.
11 changes: 7 additions & 4 deletions charts/ai-stack/Chart.lock
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
dependencies:
- name: text-generation-inference
repository: https://infracloudio.github.io/charts
version: 0.1.1
version: 0.1.3
- name: text-embeddings-inference
repository: https://infracloudio.github.io/charts
version: 0.1.1
digest: sha256:03b9860baccbba01ab701b07ab16fd12c6558b3a1920f806b623bef2cc5d03bd
generated: "2024-07-16T15:49:37.371821+05:30"
version: 0.1.3
- name: chromadb
repository: https://amikos-tech.github.io/chromadb-chart/
version: 0.1.19
digest: sha256:05ba005e3d493eccad2f831ffa151ee9de81b24597c4f347741d8cb1624feec8
generated: "2024-07-17T13:32:27.60585+05:30"
22 changes: 17 additions & 5 deletions charts/ai-stack/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.1.0
version: 0.2.0

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
Expand All @@ -25,13 +25,25 @@ appVersion: ""

dependencies:
- name: text-generation-inference
version: 0.1.1
version: 0.1.3
repository: "https://infracloudio.github.io/charts"
alias: tgi
condition: tgi.enabled

- name: text-embeddings-inference
version: 0.1.1
version: 0.1.3
repository: "https://infracloudio.github.io/charts"
alias: tei
condition: tei.enabled
condition: tei.enabled

- name: text-embeddings-inference
version: 0.1.3
repository: "https://infracloudio.github.io/charts"
alias: reranker
condition: reranker.enabled

- name: chromadb
version: 0.1.19
repository: https://amikos-tech.github.io/chromadb-chart/
alias: vectordb
condition: vectordb.enabled
6 changes: 6 additions & 0 deletions charts/ai-stack/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ See [helm repo](https://helm.sh/docs/helm/helm_repo/) for command documentation.

## Installing the Chart

Create a secret with HuggingFace token:

```bash
kubectl create secret generic hf-api-token --from-literal HF_API_TOKEN=<your-huggingface-token>
```

To install the chart with the release name `ai-stack`:

```bash
Expand Down
Binary file added charts/ai-stack/charts/chromadb-0.1.19.tgz
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
62 changes: 62 additions & 0 deletions charts/ai-stack/templates/_helpers.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "ai-stack.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "ai-stack.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}

{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "ai-stack.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Common labels
*/}}
{{- define "ai-stack.labels" -}}
helm.sh/chart: {{ include "ai-stack.chart" . }}
{{ include "ai-stack.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}

{{/*
Selector labels
*/}}
{{- define "ai-stack.selectorLabels" -}}
app.kubernetes.io/name: {{ include "ai-stack.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}

{{/*
Create the name of the service account to use
*/}}
{{- define "ai-stack.serviceAccountName" -}}
{{- if .Values.serviceAccount.create }}
{{- default (include "ai-stack.fullname" .) .Values.serviceAccount.name }}
{{- else }}
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
{{- end }}
24 changes: 24 additions & 0 deletions charts/ai-stack/templates/pvc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{{- if .Values.huggingface.cache.enabled }}
{{- if not .Values.huggingface.cache.existingClaim}}
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: {{ .Values.huggingface.cache.persistentVolumeClaim.name }}
labels:
{{- include "ai-stack.labels" . | nindent 4 }}
{{- with .Values.huggingface.cache.persistentVolumeClaim.labels }}
{{- toYaml . | nindent 4 }}
{{- end }}
annotations:
{{- with .Values.huggingface.cache.persistentVolumeClaim.annotations }}
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
storageClassName: {{ .Values.huggingface.cache.persistentVolumeClaim.storageClassName }}
accessModes:
{{- toYaml .Values.huggingface.cache.persistentVolumeClaim.accessModes | nindent 4 }}
resources:
requests:
storage: {{ .Values.huggingface.cache.persistentVolumeClaim.size }}
{{- end }}
{{- end }}
115 changes: 112 additions & 3 deletions charts/ai-stack/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.

# Values for the text-generation-inference chart
# Values for tgi: the text-generation-inference chart
# Reference: https://artifacthub.io/packages/helm/infracloud-charts/text-generation-inference?modal=values
tei:
enabled: true

Expand All @@ -14,6 +15,11 @@ tei:
value: "1024"
- name: RUST_BACKTRACE
value: "full"
- name: HF_API_TOKEN
valueFrom:
secretKeyRef:
name: hf-api-token
key: HF_API_TOKEN

resources:
limits:
Expand All @@ -28,7 +34,18 @@ tei:
type: LoadBalancer
port: 80

# Values for the text-embeddings-inference chart
volumeMounts:
- name: hf-cache
mountPath: /data

volumes:
- name: hf-cache
persistentVolumeClaim:
claimName: hf-cache


# Values for tei: the text-embeddings-inference chart
# Reference: https://artifacthub.io/packages/helm/infracloud-charts/text-embeddings-inference?modal=values
tgi:
enabled: true

Expand All @@ -40,6 +57,11 @@ tgi:
value: "6144"
- name: MAX_TOTAL_TOKENS
value: "8192"
- name: HF_API_TOKEN
valueFrom:
secretKeyRef:
name: hf-api-token
key: HF_API_TOKEN

resources:
limits:
Expand All @@ -52,4 +74,91 @@ tgi:

service:
type: LoadBalancer
port: 80
port: 80

volumeMounts:
- name: hf-cache
mountPath: /data

volumes:
- name: hf-cache
persistentVolumeClaim:
claimName: hf-cache
- name: shm
emptyDir:
medium: Memory
sizeLimit: "1Gi"


# Values for vector: the chromadb chart
# Reference: https://artifacthub.io/packages/helm/chromadb-helm/chromadb?modal=values
vectordb:
enabled: true

service:
type: LoadBalancer

huggingface:

cache:
enabled: true

existingClaim: ""

persistentVolumeClaim:
## Ref: http://kubernetes.io/docs/user-guide/persistent-volum
accessModes:
- ReadWriteMany

name: "hf-cache"

labels: {}

annotations:
nfs.io/storage-path: "hf/hub"

storageClassName: "controller-nfs"

size: "50Gi"


# Values for reranker: the text-generation-inference chart
# Reference: https://artifacthub.io/packages/helm/infracloud-charts/text-generation-inference?modal=values
reranker:
enabled: true

config:
modelID: "BAAI/bge-reranker-large"

env:
- name: MAX_CLIENT_BATCH_SIZE
value: "1024"
- name: RUST_BACKTRACE
value: "full"
- name: HF_API_TOKEN
valueFrom:
secretKeyRef:
name: hf-api-token
key: HF_API_TOKEN

resources:
limits:
nvidia.com/gpu: 1
requests:
nvidia.com/gpu: 1

strategy:
type: Recreate

service:
type: LoadBalancer
port: 80

volumeMounts:
- name: hf-cache
mountPath: /data

volumes:
- name: hf-cache
persistentVolumeClaim:
claimName: hf-cache

0 comments on commit e914691

Please sign in to comment.