diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c218b4470..31d21ee02 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -19,7 +19,7 @@ repos: hooks: - id: end-of-file-fixer files: \.(mps|json|yaml|yml|txt)$ - exclude: ^datasets/.*\.(mps|json|yaml|yml|txt)$ + exclude: ^(datasets|helmchart)/.*\.(mps|json|yaml|yml|txt)$ - id: trailing-whitespace files: \.(mps|json|yaml|yml|txt)$ exclude: ^datasets/.*\.(mps|json|yaml|yml|txt)$ @@ -27,6 +27,8 @@ repos: - id: check-executables-have-shebangs - id: check-json - id: check-yaml + files: \.(yaml)$ + exclude: ^(helmchart)/.*\.(yaml)$ - repo: https://github.com/PyCQA/isort rev: 5.12.0 hooks: diff --git a/ci/release/update-version-cuopt.sh b/ci/release/update-version-cuopt.sh index 356562399..3e2216fa4 100755 --- a/ci/release/update-version-cuopt.sh +++ b/ci/release/update-version-cuopt.sh @@ -110,6 +110,11 @@ sed_runner "s/\(cuopt-sh-client==\)[0-9]\+\.[0-9]\+\.\\*/\1${PY_NEXT_SHORT_TAG}. sed_runner 's/cuopt-server=[0-9][0-9].[0-9][0-9] cuopt-sh-client=[0-9][0-9].[0-9][0-9] python=[0-9].[0-9][0-9] cuda-version=[0-9][0-9].[0-9]/cuopt-server='${NEXT_SHORT_TAG}' cuopt-sh-client='${NEXT_SHORT_TAG}' python=3.12 cuda-version=12.8/g' README.md sed_runner 's|cuopt:[0-9]\{2\}\.[0-9]\{1,2\}\.[0-9]\+\(-cuda12\.8-\)\(py[0-9]\+\)|cuopt:'"${DOCKER_TAG}"'\1\2|g' README.md +# Update Helm chart files +sed_runner 's/\(tag: "\)[0-9][0-9]\.[0-9]\+\.[0-9]\+\(-cuda12\.8-py3\.12"\)/\1'${DOCKER_TAG}'\2/g' helmchart/cuopt-server/values.yaml +sed_runner 's/\(appVersion: \)[0-9][0-9]\.[0-9]\+\.[0-9]\+/\1'${DOCKER_TAG}'/g' helmchart/cuopt-server/Chart.yaml +sed_runner 's/\(version: \)[0-9][0-9]\.[0-9]\+\.[0-9]\+/\1'${DOCKER_TAG}'/g' helmchart/cuopt-server/Chart.yaml + DEPENDENCIES=( libcuopt cuopt diff --git a/helmchart/cuopt-server/Chart.yaml b/helmchart/cuopt-server/Chart.yaml new file mode 100644 index 000000000..8fa749529 --- /dev/null +++ b/helmchart/cuopt-server/Chart.yaml @@ -0,0 +1,17 @@ +apiVersion: v2 +appVersion: 25.8.0 +description: A Helm chart for NVIDIA cuOpt Server with GPU support +home: https://docs.nvidia.com/cuopt/user-guide/latest/resources.html +keywords: +- nvidia +- cuopt +- optimization +- gpu +maintainers: +- email: cuopt@nvidia.com + name: cuopt-maintainer +name: cuopt-server +sources: +- https://docs.nvidia.com/cuopt/user-guide/latest/resources.html +type: application +version: 25.8.0 diff --git a/helmchart/cuopt-server/README.md b/helmchart/cuopt-server/README.md new file mode 100644 index 000000000..0b1a1cec2 --- /dev/null +++ b/helmchart/cuopt-server/README.md @@ -0,0 +1,66 @@ +# cuOpt Server Helm Chart + +This Helm chart deploys the NVIDIA cuOpt Server with GPU support on Kubernetes. + +## Prerequisites + +- Kubernetes cluster with GPU nodes +- NVIDIA device plugin installed on the cluster +- NVIDIA GPU Operator (recommended) or manual GPU driver installation +- Helm 3.x installed + +## Selecting the Container Image + +- To use a specific version of the cuOpt server, update the `image.tag` field in `values.yaml`. +- If the desired version is not available as a release, you may use a nightly image. +- All available container tags can be found on [Docker Hub](https://hub.docker.com/r/nvidia/cuopt/tags). +## Installation + +### 1. Add the chart repository (if publishing to a repository) +```bash +helm repo add cuopt-server https://your-repo-url +helm repo update +``` + +### 2. Install the chart +```bash +# Install with default values +helm install cuopt-server ./cuopt-server + +# Install with custom values +helm install cuopt-server ./cuopt-server -f custom-values.yaml + +# Install with inline overrides +helm install cuopt-server ./cuopt-server \ + --set resources.requests.nvidia.com/gpu=2 \ + --set resources.limits.nvidia.com/gpu=2 +``` + +## Usage + +### Port Forwarding (for ClusterIP service) +```bash +kubectl port-forward service/cuopt-server 5000:5000 +``` + +### Accessing the Service +Once deployed, you can access the cuOpt server API at: +- `http://localhost:5000` (with port forwarding) +- Or through the service endpoint within the cluster + +### Testing the Deployment +```bash +# Check pod status +kubectl get pods -l app.kubernetes.io/name=cuopt-server + +# View logs +kubectl logs -l app.kubernetes.io/name=cuopt-server + +# Check GPU allocation +kubectl describe pod -l app.kubernetes.io/name=cuopt-server +``` + +## Uninstall + +```bash +helm uninstall cuopt-server \ No newline at end of file diff --git a/helmchart/cuopt-server/templates/NOTES.txt b/helmchart/cuopt-server/templates/NOTES.txt new file mode 100644 index 000000000..6e258353e --- /dev/null +++ b/helmchart/cuopt-server/templates/NOTES.txt @@ -0,0 +1,34 @@ +1. Get the application URL by running these commands: +{{- if .Values.ingress.enabled }} +{{- range $host := .Values.ingress.hosts }} + {{- range .paths }} + http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }} + {{- end }} +{{- end }} +{{- else if contains "NodePort" .Values.service.type }} + export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "cuopt-server.fullname" . }}) + export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") + echo http://$NODE_IP:$NODE_PORT +{{- else if contains "LoadBalancer" .Values.service.type }} + NOTE: It may take a few minutes for the LoadBalancer IP to be available. + You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "cuopt-server.fullname" . }}' + export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "cuopt-server.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}") + echo http://$SERVICE_IP:{{ .Values.service.port }} +{{- else if contains "ClusterIP" .Values.service.type }} + export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "{{ include "cuopt-server.selectorLabels" . }}" -o jsonpath="{.items[0].metadata.name}") + export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + echo "Visit http://127.0.0.1:5000 to use your application" + kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 5000:$CONTAINER_PORT +{{- end }} + +2. To check the status of your cuOpt server deployment: + kubectl get pods -l "{{ include "cuopt-server.selectorLabels" . }}" + +3. To view the logs: + kubectl logs -l "{{ include "cuopt-server.selectorLabels" . }}" + +4. Important Notes: + - This deployment requires GPU nodes in your cluster + - Make sure your cluster has the NVIDIA device plugin installed + - The server will be running the command: python -m cuopt_server.cuopt_service + - Port 5000 is exposed for the cuOpt service API \ No newline at end of file diff --git a/helmchart/cuopt-server/templates/_helpers.tpl b/helmchart/cuopt-server/templates/_helpers.tpl new file mode 100644 index 000000000..d35eb8082 --- /dev/null +++ b/helmchart/cuopt-server/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "cuopt-server.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "cuopt-server.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "cuopt-server.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "cuopt-server.labels" -}} +helm.sh/chart: {{ include "cuopt-server.chart" . }} +{{ include "cuopt-server.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "cuopt-server.selectorLabels" -}} +app.kubernetes.io/name: {{ include "cuopt-server.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "cuopt-server.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "cuopt-server.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/helmchart/cuopt-server/templates/deployment.yaml b/helmchart/cuopt-server/templates/deployment.yaml new file mode 100644 index 000000000..5e51f5882 --- /dev/null +++ b/helmchart/cuopt-server/templates/deployment.yaml @@ -0,0 +1,88 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "cuopt-server.fullname" . }} + labels: + {{- include "cuopt-server.labels" . | nindent 4 }} +spec: + {{- if not .Values.autoscaling.enabled }} + replicas: {{ .Values.replicaCount }} + {{- end }} + selector: + matchLabels: + {{- include "cuopt-server.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "cuopt-server.selectorLabels" . | nindent 8 }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "cuopt-server.serviceAccountName" . }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Chart.Name }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- if .Values.command }} + command: + {{- toYaml .Values.command | nindent 12 }} + {{- end }} + ports: + - name: http + containerPort: {{ .Values.service.targetPort }} + protocol: TCP + livenessProbe: + httpGet: + path: /v2/health/live + port: http + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 + readinessProbe: + httpGet: + path: /v2/health/ready + port: http + initialDelaySeconds: 10 + periodSeconds: 5 + timeoutSeconds: 3 + failureThreshold: 3 + resources: + {{- toYaml .Values.resources | nindent 12 }} + {{- with .Values.env }} + env: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- if .Values.persistence.enabled }} + volumeMounts: + - name: storage + mountPath: /data + {{- end }} + {{- if .Values.persistence.enabled }} + volumes: + - name: storage + persistentVolumeClaim: + claimName: {{ include "cuopt-server.fullname" . }}-pvc + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/helmchart/cuopt-server/templates/ingress.yaml b/helmchart/cuopt-server/templates/ingress.yaml new file mode 100644 index 000000000..97a98fd36 --- /dev/null +++ b/helmchart/cuopt-server/templates/ingress.yaml @@ -0,0 +1,59 @@ +{{- if .Values.ingress.enabled -}} +{{- $fullName := include "cuopt-server.fullname" . -}} +{{- $svcPort := .Values.service.port -}} +{{- if and .Values.ingress.className (not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class")) }} + {{- $_ := set .Values.ingress.annotations "kubernetes.io/ingress.class" .Values.ingress.className}} +{{- end }} +{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}} +apiVersion: networking.k8s.io/v1 +{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}} +apiVersion: networking.k8s.io/v1beta1 +{{- else -}} +apiVersion: extensions/v1beta1 +{{- end }} +kind: Ingress +metadata: + name: {{ $fullName }} + labels: + {{- include "cuopt-server.labels" . | nindent 4 }} + {{- with .Values.ingress.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- if and .Values.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }} + ingressClassName: {{ .Values.ingress.className }} + {{- end }} + {{- if .Values.ingress.tls }} + tls: + {{- range .Values.ingress.tls }} + - hosts: + {{- range .hosts }} + - {{ . | quote }} + {{- end }} + secretName: {{ .secretName }} + {{- end }} + {{- end }} + rules: + {{- range .Values.ingress.hosts }} + - host: {{ .host | quote }} + http: + paths: + {{- range .paths }} + - path: {{ .path }} + {{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }} + pathType: {{ .pathType }} + {{- end }} + backend: + {{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }} + service: + name: {{ $fullName }} + port: + number: {{ $svcPort }} + {{- else }} + serviceName: {{ $fullName }} + servicePort: {{ $svcPort }} + {{- end }} + {{- end }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/helmchart/cuopt-server/templates/service.yaml b/helmchart/cuopt-server/templates/service.yaml new file mode 100644 index 000000000..9327414be --- /dev/null +++ b/helmchart/cuopt-server/templates/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "cuopt-server.fullname" . }} + labels: + {{- include "cuopt-server.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: {{ .Values.service.targetPort }} + protocol: TCP + name: http + selector: + {{- include "cuopt-server.selectorLabels" . | nindent 4 }} \ No newline at end of file diff --git a/helmchart/cuopt-server/templates/serviceaccount.yaml b/helmchart/cuopt-server/templates/serviceaccount.yaml new file mode 100644 index 000000000..a49d5ce20 --- /dev/null +++ b/helmchart/cuopt-server/templates/serviceaccount.yaml @@ -0,0 +1,12 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "cuopt-server.serviceAccountName" . }} + labels: + {{- include "cuopt-server.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/helmchart/cuopt-server/values.yaml b/helmchart/cuopt-server/values.yaml new file mode 100644 index 000000000..450865862 --- /dev/null +++ b/helmchart/cuopt-server/values.yaml @@ -0,0 +1,97 @@ +# Default values for cuopt-server. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +replicaCount: 1 + +image: + repository: nvidia/cuopt + pullPolicy: IfNotPresent + tag: "25.8.0-cuda12.8-py3.12" + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" + +serviceAccount: + # Specifies whether a service account should be created + create: true + # Annotations to add to the service account + annotations: {} + # The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template + name: "" + +podAnnotations: {} + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: {} + # capabilities: + # drop: + # - ALL + # readOnlyRootFilesystem: true + # runAsNonRoot: true + # runAsUser: 1000 + +service: + type: ClusterIP + port: 5000 + targetPort: 5000 + +ingress: + enabled: false + className: "" + annotations: {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + hosts: + - host: cuopt-server.local + paths: + - path: / + pathType: Prefix + tls: [] + # - secretName: cuopt-server-tls + # hosts: + # - cuopt-server.local + +resources: + limits: + nvidia.com/gpu: 1 + + requests: + nvidia.com/gpu: 1 + +autoscaling: + enabled: false + minReplicas: 1 + maxReplicas: 100 + targetCPUUtilizationPercentage: 80 + # targetMemoryUtilizationPercentage: 80 + +nodeSelector: + # Uncomment to schedule on nodes with GPU + # accelerator: nvidia-tesla-k80 + +tolerations: [] + +affinity: {} + +# Command to run the cuOpt service +command: + - python + - -m + - cuopt_server.cuopt_service + - -p + - "5000" + +# Environment variables +env: [] + +# Persistent storage (if needed) +persistence: + enabled: false + accessMode: ReadWriteOnce + size: 1Gi + # storageClass: ""